{ "module": "mail_intake.ingestion.reader", "content": { "path": "mail_intake.ingestion.reader", "docstring": "# Summary\n\nHigh-level mail ingestion orchestration for Mail Intake.\n\nThis module provides the primary, provider-agnostic entry point for\nreading and processing mail data.\n\nIt coordinates:\n\n- Mail adapter access.\n- Message and thread iteration.\n- Header and body parsing.\n- Normalization and model construction.\n\nNo provider-specific logic or API semantics are permitted in this layer.", "objects": { "datetime": { "name": "datetime", "kind": "alias", "path": "mail_intake.ingestion.reader.datetime", "signature": "", "docstring": null }, "Iterator": { "name": "Iterator", "kind": "alias", "path": "mail_intake.ingestion.reader.Iterator", "signature": "", "docstring": null }, "Dict": { "name": "Dict", "kind": "alias", "path": "mail_intake.ingestion.reader.Dict", "signature": "", "docstring": null }, "Any": { "name": "Any", "kind": "alias", "path": "mail_intake.ingestion.reader.Any", "signature": "", "docstring": null }, "MailIntakeAdapter": { "name": "MailIntakeAdapter", "kind": "class", "path": "mail_intake.ingestion.reader.MailIntakeAdapter", "signature": "", "docstring": "Base adapter interface for mail providers.\n\nNotes:\n **Guarantees:**\n\n - Discover messages matching a query.\n - Retrieve full message payloads.\n - Retrieve full thread payloads.\n\n **Lifecycle:**\n\n - Adapters are intentionally read-only and must not mutate provider state.", "members": { "iter_message_refs": { "name": "iter_message_refs", "kind": "function", "path": "mail_intake.ingestion.reader.MailIntakeAdapter.iter_message_refs", "signature": "", "docstring": "Iterate over lightweight message references matching a query.\n\nArgs:\n query (str):\n Provider-specific query string used to filter messages.\n\nYields:\n Dict[str, str]:\n Dictionaries containing message and thread identifiers.\n\nNotes:\n **Guarantees:**\n\n - Implementations must yield dictionaries containing at least\n `message_id` and `thread_id`.\n\nExample:\n Typical yield:\n\n ```python\n {\n \"message_id\": \"...\",\n \"thread_id\": \"...\"\n }\n ```" }, "fetch_message": { "name": "fetch_message", "kind": "function", "path": "mail_intake.ingestion.reader.MailIntakeAdapter.fetch_message", "signature": "", "docstring": "Fetch a full raw message by message identifier.\n\nArgs:\n message_id (str):\n Provider-specific message identifier.\n\nReturns:\n Dict[str, Any]:\n Provider-native message payload (e.g., Gmail message JSON structure)." }, "fetch_thread": { "name": "fetch_thread", "kind": "function", "path": "mail_intake.ingestion.reader.MailIntakeAdapter.fetch_thread", "signature": "", "docstring": "Fetch a full raw thread by thread identifier.\n\nArgs:\n thread_id (str):\n Provider-specific thread identifier.\n\nReturns:\n Dict[str, Any]:\n Provider-native thread payload." } } }, "MailIntakeMessage": { "name": "MailIntakeMessage", "kind": "class", "path": "mail_intake.ingestion.reader.MailIntakeMessage", "signature": "", "docstring": "Canonical internal representation of a single email message.\n\nNotes:\n **Guarantees:**\n\n - This model represents a fully parsed and normalized email message.\n - It is intentionally provider-agnostic and suitable for\n persistence, indexing, and downstream processing.\n\n **Constraints:**\n\n - No provider-specific identifiers, payloads, or API semantics\n should appear in this model.", "members": { "message_id": { "name": "message_id", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.message_id", "signature": "", "docstring": "Provider-specific message identifier." }, "thread_id": { "name": "thread_id", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.thread_id", "signature": "", "docstring": "Provider-specific thread identifier to which this message belongs." }, "timestamp": { "name": "timestamp", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.timestamp", "signature": "", "docstring": "Message timestamp as a timezone-naive UTC datetime." }, "from_email": { "name": "from_email", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.from_email", "signature": "", "docstring": "Sender email address." }, "from_name": { "name": "from_name", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.from_name", "signature": "", "docstring": "Optional human-readable sender name." }, "subject": { "name": "subject", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.subject", "signature": "", "docstring": "Raw subject line of the message." }, "body_text": { "name": "body_text", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.body_text", "signature": "", "docstring": "Extracted plain-text body content of the message." }, "snippet": { "name": "snippet", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.snippet", "signature": "", "docstring": "Short provider-supplied preview snippet of the message." }, "raw_headers": { "name": "raw_headers", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.raw_headers", "signature": "", "docstring": "Normalized mapping of message headers (header name → value)." } } }, "MailIntakeThread": { "name": "MailIntakeThread", "kind": "class", "path": "mail_intake.ingestion.reader.MailIntakeThread", "signature": "", "docstring": "Canonical internal representation of an email thread.\n\nNotes:\n **Guarantees:**\n\n - A thread groups multiple related messages under a single subject\n and participant set.\n - It is designed to support reasoning over conversational context\n such as job applications, interviews, follow-ups, and ongoing discussions.\n - This model is provider-agnostic and safe to persist.", "members": { "thread_id": { "name": "thread_id", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeThread.thread_id", "signature": "", "docstring": "Provider-specific thread identifier." }, "normalized_subject": { "name": "normalized_subject", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeThread.normalized_subject", "signature": "", "docstring": "Normalized subject line used to group related messages." }, "participants": { "name": "participants", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeThread.participants", "signature": "", "docstring": "Set of unique participant email addresses observed in the thread." }, "messages": { "name": "messages", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeThread.messages", "signature": "", "docstring": "Ordered list of messages belonging to this thread." }, "last_activity_at": { "name": "last_activity_at", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeThread.last_activity_at", "signature": "", "docstring": "Timestamp of the most recent message in the thread." }, "add_message": { "name": "add_message", "kind": "function", "path": "mail_intake.ingestion.reader.MailIntakeThread.add_message", "signature": "", "docstring": "Add a message to the thread and update derived fields.\n\nArgs:\n message (MailIntakeMessage):\n Parsed mail message to add to the thread.\n\nNotes:\n **Responsibilities:**\n\n - Appends the message to the thread.\n - Tracks unique participants.\n - Updates the last activity timestamp." } } }, "parse_headers": { "name": "parse_headers", "kind": "function", "path": "mail_intake.ingestion.reader.parse_headers", "signature": "", "docstring": "Convert a list of Gmail-style headers into a normalized dict.\n\nArgs:\n raw_headers (List[Dict[str, str]]):\n List of header dictionaries, each containing `name` and `value` keys.\n\nReturns:\n Dict[str, str]:\n Dictionary mapping lowercase header names to stripped values.\n\nNotes:\n **Guarantees:**\n\n - Provider payloads (such as Gmail) typically represent headers as a\n list of name/value mappings.\n - This function normalizes them into a case-insensitive dictionary\n keyed by lowercase header names.\n\nExample:\n Typical usage:\n\n ```python\n Input:\n [\n {\"name\": \"From\", \"value\": \"John Doe \"},\n {\"name\": \"Subject\", \"value\": \"Re: Interview Update\"},\n ]\n\n Output:\n {\n \"from\": \"John Doe \",\n \"subject\": \"Re: Interview Update\",\n }\n ```" }, "extract_sender": { "name": "extract_sender", "kind": "function", "path": "mail_intake.ingestion.reader.extract_sender", "signature": "", "docstring": "Extract sender email and optional display name from headers.\n\nArgs:\n headers (Dict[str, str]):\n Normalized header dictionary as returned by `parse_headers()`.\n\nReturns:\n Tuple[str, Optional[str]]:\n A tuple `(email, name)` where `email` is the sender email address\n and `name` is the display name, or `None` if unavailable.\n\nNotes:\n **Responsibilities:**\n\n - This function parses the `From` header and attempts to extract\n sender email address and optional human-readable display name.\n\nExample:\n Typical values:\n\n - `\"John Doe \"` -> `(\"john@example.com\", \"John Doe\")`\n - `\"john@example.com\"` -> `(\"john@example.com\", None)`" }, "extract_body": { "name": "extract_body", "kind": "function", "path": "mail_intake.ingestion.reader.extract_body", "signature": "", "docstring": "Extract the best-effort message body from a Gmail payload.\n\nPriority:\n\n1. `text/plain`\n2. `text/html` (stripped to text)\n3. Single-part body\n4. Empty string (if nothing usable found)\n\nArgs:\n payload (Dict[str, Any]):\n Provider-native message payload dictionary.\n\nReturns:\n str:\n Extracted plain-text message body." }, "normalize_subject": { "name": "normalize_subject", "kind": "function", "path": "mail_intake.ingestion.reader.normalize_subject", "signature": "", "docstring": "Normalize an email subject for thread-level comparison.\n\nArgs:\n subject (str):\n Raw subject line from a message header.\n\nReturns:\n str:\n Normalized subject string suitable for thread grouping.\n\nNotes:\n **Responsibilities:**\n\n - Strips common prefixes such as `Re:`, `Fwd:`, and `FW:`.\n - Repeats prefix stripping to handle stacked prefixes.\n - Collapses excessive whitespace.\n - Preserves original casing (no lowercasing).\n\n **Guarantees:**\n\n - This function is intentionally conservative and avoids aggressive\n transformations that could alter the semantic meaning of the subject." }, "MailIntakeParsingError": { "name": "MailIntakeParsingError", "kind": "class", "path": "mail_intake.ingestion.reader.MailIntakeParsingError", "signature": "", "docstring": "Errors encountered while parsing message content.\n\nNotes:\n **Lifecycle:**\n\n - Raised when raw provider payloads cannot be interpreted or\n normalized into internal domain models." }, "MailIntakeReader": { "name": "MailIntakeReader", "kind": "class", "path": "mail_intake.ingestion.reader.MailIntakeReader", "signature": "", "docstring": "High-level read-only ingestion interface.\n\nNotes:\n **Responsibilities:**\n\n - This class is the primary entry point for consumers of the\n Mail Intake library.\n - It orchestrates the full ingestion pipeline:\n - Querying the adapter for message references.\n - Fetching raw provider messages.\n - Parsing and normalizing message data.\n - Constructing domain models.\n\n **Constraints:**\n\n - This class is intentionally: Provider-agnostic, stateless beyond\n iteration scope, read-only.", "members": { "iter_messages": { "name": "iter_messages", "kind": "function", "path": "mail_intake.ingestion.reader.MailIntakeReader.iter_messages", "signature": "", "docstring": "Iterate over parsed messages matching a provider query.\n\nArgs:\n query (str):\n Provider-specific query string used to filter messages.\n\nYields:\n MailIntakeMessage:\n Fully parsed and normalized `MailIntakeMessage` instances.\n\nRaises:\n MailIntakeParsingError:\n If a message cannot be parsed." }, "iter_threads": { "name": "iter_threads", "kind": "function", "path": "mail_intake.ingestion.reader.MailIntakeReader.iter_threads", "signature": "", "docstring": "Iterate over threads constructed from messages matching a query.\n\nArgs:\n query (str):\n Provider-specific query string used to filter messages.\n\nYields:\n MailIntakeThread:\n An iterator of `MailIntakeThread` instances.\n\nRaises:\n `MailIntakeParsingError`:\n If a message cannot be parsed.\n\nNotes:\n **Guarantees:**\n\n - Messages are grouped by `thread_id` and yielded as complete\n thread objects containing all associated messages." } } } } } }