{ "module": "mail_intake.ingestion.reader", "content": { "path": "mail_intake.ingestion.reader", "docstring": "High-level mail ingestion orchestration for Mail Intake.\n\nThis module provides the primary, provider-agnostic entry point for\nreading and processing mail data.\n\nIt coordinates:\n- Mail adapter access\n- Message and thread iteration\n- Header and body parsing\n- Normalization and model construction\n\nNo provider-specific logic or API semantics are permitted in this layer.", "objects": { "datetime": { "name": "datetime", "kind": "alias", "path": "mail_intake.ingestion.reader.datetime", "signature": "", "docstring": null }, "Iterator": { "name": "Iterator", "kind": "alias", "path": "mail_intake.ingestion.reader.Iterator", "signature": "", "docstring": null }, "Dict": { "name": "Dict", "kind": "alias", "path": "mail_intake.ingestion.reader.Dict", "signature": "", "docstring": null }, "Any": { "name": "Any", "kind": "alias", "path": "mail_intake.ingestion.reader.Any", "signature": "", "docstring": null }, "MailIntakeAdapter": { "name": "MailIntakeAdapter", "kind": "class", "path": "mail_intake.ingestion.reader.MailIntakeAdapter", "signature": "", "docstring": "Base adapter interface for mail providers.\n\nThis interface defines the minimal contract required to:\n- Discover messages matching a query\n- Retrieve full message payloads\n- Retrieve full thread payloads\n\nAdapters are intentionally read-only and must not mutate provider state.", "members": { "iter_message_refs": { "name": "iter_message_refs", "kind": "function", "path": "mail_intake.ingestion.reader.MailIntakeAdapter.iter_message_refs", "signature": "", "docstring": "Iterate over lightweight message references matching a query.\n\nImplementations must yield dictionaries containing at least:\n- ``message_id``: Provider-specific message identifier\n- ``thread_id``: Provider-specific thread identifier\n\nArgs:\n query: Provider-specific query string used to filter messages.\n\nYields:\n Dictionaries containing message and thread identifiers.\n\nExample yield:\n {\n \"message_id\": \"...\",\n \"thread_id\": \"...\"\n }" }, "fetch_message": { "name": "fetch_message", "kind": "function", "path": "mail_intake.ingestion.reader.MailIntakeAdapter.fetch_message", "signature": "", "docstring": "Fetch a full raw message by message identifier.\n\nArgs:\n message_id: Provider-specific message identifier.\n\nReturns:\n Provider-native message payload\n (e.g., Gmail message JSON structure)." }, "fetch_thread": { "name": "fetch_thread", "kind": "function", "path": "mail_intake.ingestion.reader.MailIntakeAdapter.fetch_thread", "signature": "", "docstring": "Fetch a full raw thread by thread identifier.\n\nArgs:\n thread_id: Provider-specific thread identifier.\n\nReturns:\n Provider-native thread payload." } } }, "MailIntakeMessage": { "name": "MailIntakeMessage", "kind": "class", "path": "mail_intake.ingestion.reader.MailIntakeMessage", "signature": "", "docstring": "Canonical internal representation of a single email message.\n\nThis model represents a fully parsed and normalized email message.\nIt is intentionally provider-agnostic and suitable for persistence,\nindexing, and downstream processing.\n\nNo provider-specific identifiers, payloads, or API semantics\nshould appear in this model.", "members": { "message_id": { "name": "message_id", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.message_id", "signature": "", "docstring": "Provider-specific message identifier." }, "thread_id": { "name": "thread_id", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.thread_id", "signature": "", "docstring": "Provider-specific thread identifier to which this message belongs." }, "timestamp": { "name": "timestamp", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.timestamp", "signature": "", "docstring": "Message timestamp as a timezone-naive UTC datetime." }, "from_email": { "name": "from_email", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.from_email", "signature": "", "docstring": "Sender email address." }, "from_name": { "name": "from_name", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.from_name", "signature": "", "docstring": "Optional human-readable sender name." }, "subject": { "name": "subject", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.subject", "signature": "", "docstring": "Raw subject line of the message." }, "body_text": { "name": "body_text", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.body_text", "signature": "", "docstring": "Extracted plain-text body content of the message." }, "snippet": { "name": "snippet", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.snippet", "signature": "", "docstring": "Short provider-supplied preview snippet of the message." }, "raw_headers": { "name": "raw_headers", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeMessage.raw_headers", "signature": "", "docstring": "Normalized mapping of message headers (header name → value)." } } }, "MailIntakeThread": { "name": "MailIntakeThread", "kind": "class", "path": "mail_intake.ingestion.reader.MailIntakeThread", "signature": "", "docstring": "Canonical internal representation of an email thread.\n\nA thread groups multiple related messages under a single subject\nand participant set. It is designed to support reasoning over\nconversational context such as job applications, interviews,\nfollow-ups, and ongoing discussions.\n\nThis model is provider-agnostic and safe to persist.", "members": { "thread_id": { "name": "thread_id", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeThread.thread_id", "signature": "", "docstring": "Provider-specific thread identifier." }, "normalized_subject": { "name": "normalized_subject", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeThread.normalized_subject", "signature": "", "docstring": "Normalized subject line used to group related messages." }, "participants": { "name": "participants", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeThread.participants", "signature": "", "docstring": "Set of unique participant email addresses observed in the thread." }, "messages": { "name": "messages", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeThread.messages", "signature": "", "docstring": "Ordered list of messages belonging to this thread." }, "last_activity_at": { "name": "last_activity_at", "kind": "attribute", "path": "mail_intake.ingestion.reader.MailIntakeThread.last_activity_at", "signature": "", "docstring": "Timestamp of the most recent message in the thread." }, "add_message": { "name": "add_message", "kind": "function", "path": "mail_intake.ingestion.reader.MailIntakeThread.add_message", "signature": "", "docstring": "Add a message to the thread and update derived fields.\n\nThis method:\n- Appends the message to the thread\n- Tracks unique participants\n- Updates the last activity timestamp\n\nArgs:\n message: Parsed mail message to add to the thread." } } }, "parse_headers": { "name": "parse_headers", "kind": "function", "path": "mail_intake.ingestion.reader.parse_headers", "signature": "", "docstring": "Convert a list of Gmail-style headers into a normalized dict.\n\nProvider payloads (such as Gmail) typically represent headers as a list\nof name/value mappings. This function normalizes them into a\ncase-insensitive dictionary keyed by lowercase header names.\n\nArgs:\n raw_headers: List of header dictionaries, each containing\n ``name`` and ``value`` keys.\n\nReturns:\n Dictionary mapping lowercase header names to stripped values.\n\nExample:\n Input:\n [\n {\"name\": \"From\", \"value\": \"John Doe \"},\n {\"name\": \"Subject\", \"value\": \"Re: Interview Update\"},\n ]\n\n Output:\n {\n \"from\": \"John Doe \",\n \"subject\": \"Re: Interview Update\",\n }" }, "extract_sender": { "name": "extract_sender", "kind": "function", "path": "mail_intake.ingestion.reader.extract_sender", "signature": "", "docstring": "Extract sender email and optional display name from headers.\n\nThis function parses the ``From`` header and attempts to extract:\n- Sender email address\n- Optional human-readable display name\n\nArgs:\n headers: Normalized header dictionary as returned by\n :func:`parse_headers`.\n\nReturns:\n A tuple ``(email, name)`` where:\n - ``email`` is the sender email address\n - ``name`` is the display name, or ``None`` if unavailable\n\nExamples:\n ``\"John Doe \"`` → ``(\"john@example.com\", \"John Doe\")``\n ``\"john@example.com\"`` → ``(\"john@example.com\", None)``" }, "extract_body": { "name": "extract_body", "kind": "function", "path": "mail_intake.ingestion.reader.extract_body", "signature": "", "docstring": "Extract the best-effort message body from a Gmail payload.\n\nPriority:\n1. text/plain\n2. text/html (stripped to text)\n3. Single-part body\n4. empty string (if nothing usable found)\n\nArgs:\n payload: Provider-native message payload dictionary.\n\nReturns:\n Extracted plain-text message body." }, "normalize_subject": { "name": "normalize_subject", "kind": "function", "path": "mail_intake.ingestion.reader.normalize_subject", "signature": "", "docstring": "Normalize an email subject for thread-level comparison.\n\nOperations:\n- Strips common prefixes such as ``Re:``, ``Fwd:``, and ``FW:``\n- Repeats prefix stripping to handle stacked prefixes\n- Collapses excessive whitespace\n- Preserves original casing (no lowercasing)\n\nThis function is intentionally conservative and avoids aggressive\ntransformations that could alter the semantic meaning of the subject.\n\nArgs:\n subject: Raw subject line from a message header.\n\nReturns:\n Normalized subject string suitable for thread grouping." }, "MailIntakeParsingError": { "name": "MailIntakeParsingError", "kind": "class", "path": "mail_intake.ingestion.reader.MailIntakeParsingError", "signature": "", "docstring": "Errors encountered while parsing message content.\n\nRaised when raw provider payloads cannot be interpreted\nor normalized into internal domain models." }, "MailIntakeReader": { "name": "MailIntakeReader", "kind": "class", "path": "mail_intake.ingestion.reader.MailIntakeReader", "signature": "", "docstring": "High-level read-only ingestion interface.\n\nThis class is the **primary entry point** for consumers of the Mail\nIntake library.\n\nIt orchestrates the full ingestion pipeline:\n- Querying the adapter for message references\n- Fetching raw provider messages\n- Parsing and normalizing message data\n- Constructing domain models\n\nThis class is intentionally:\n- Provider-agnostic\n- Stateless beyond iteration scope\n- Read-only", "members": { "iter_messages": { "name": "iter_messages", "kind": "function", "path": "mail_intake.ingestion.reader.MailIntakeReader.iter_messages", "signature": "", "docstring": "Iterate over parsed messages matching a provider query.\n\nArgs:\n query: Provider-specific query string used to filter messages.\n\nYields:\n Fully parsed and normalized `MailIntakeMessage` instances.\n\nRaises:\n MailIntakeParsingError: If a message cannot be parsed." }, "iter_threads": { "name": "iter_threads", "kind": "function", "path": "mail_intake.ingestion.reader.MailIntakeReader.iter_threads", "signature": "", "docstring": "Iterate over threads constructed from messages matching a query.\n\nMessages are grouped by `thread_id` and yielded as complete thread\nobjects containing all associated messages.\n\nArgs:\n query: Provider-specific query string used to filter messages.\n\nReturns:\n An iterator of `MailIntakeThread` instances.\n\nRaises:\n MailIntakeParsingError: If a message cannot be parsed." } } } } } }