updated docs strings and added README.md

This commit is contained in:
2026-03-08 17:59:53 +05:30
parent 0453fdd88a
commit c541577788
46 changed files with 863 additions and 681 deletions

View File

@@ -2,7 +2,7 @@
"module": "mail_intake.ingestion.reader",
"content": {
"path": "mail_intake.ingestion.reader",
"docstring": "High-level mail ingestion orchestration for Mail Intake.\n\n---\n\n## Summary\n\nThis module provides the primary, provider-agnostic entry point for\nreading and processing mail data.\n\nIt coordinates:\n- Mail adapter access\n- Message and thread iteration\n- Header and body parsing\n- Normalization and model construction\n\nNo provider-specific logic or API semantics are permitted in this layer.",
"docstring": "# Summary\n\nHigh-level mail ingestion orchestration for Mail Intake.\n\nThis module provides the primary, provider-agnostic entry point for\nreading and processing mail data.\n\nIt coordinates:\n\n- Mail adapter access.\n- Message and thread iteration.\n- Header and body parsing.\n- Normalization and model construction.\n\nNo provider-specific logic or API semantics are permitted in this layer.",
"objects": {
"datetime": {
"name": "datetime",
@@ -37,14 +37,14 @@
"kind": "class",
"path": "mail_intake.ingestion.reader.MailIntakeAdapter",
"signature": "<bound method Alias.signature of Alias('MailIntakeAdapter', 'mail_intake.adapters.base.MailIntakeAdapter')>",
"docstring": "Base adapter interface for mail providers.\n\nNotes:\n **Guarantees:**\n\n - discover messages matching a query\n - retrieve full message payloads\n - retrieve full thread payloads\n\n **Lifecycle:**\n\n - adapters are intentionally read-only and must not mutate provider state",
"docstring": "Base adapter interface for mail providers.\n\nNotes:\n **Guarantees:**\n\n - Discover messages matching a query.\n - Retrieve full message payloads.\n - Retrieve full thread payloads.\n\n **Lifecycle:**\n\n - Adapters are intentionally read-only and must not mutate provider state.",
"members": {
"iter_message_refs": {
"name": "iter_message_refs",
"kind": "function",
"path": "mail_intake.ingestion.reader.MailIntakeAdapter.iter_message_refs",
"signature": "<bound method Alias.signature of Alias('iter_message_refs', 'mail_intake.adapters.base.MailIntakeAdapter.iter_message_refs')>",
"docstring": "Iterate over lightweight message references matching a query.\n\nArgs:\n query (str):\n Provider-specific query string used to filter messages.\n\nYields:\n Dict[str, str]:\n Dictionaries containing message and thread identifiers.\n\nNotes:\n **Guarantees:**\n\n - Implementations must yield dictionaries containing at least ``message_id`` and ``thread_id``\n\nExample:\n Typical yield:\n\n {\n \"message_id\": \"...\",\n \"thread_id\": \"...\"\n }"
"docstring": "Iterate over lightweight message references matching a query.\n\nArgs:\n query (str):\n Provider-specific query string used to filter messages.\n\nYields:\n Dict[str, str]:\n Dictionaries containing message and thread identifiers.\n\nNotes:\n **Guarantees:**\n\n - Implementations must yield dictionaries containing at least\n `message_id` and `thread_id`.\n\nExample:\n Typical yield:\n\n ```python\n {\n \"message_id\": \"...\",\n \"thread_id\": \"...\"\n }\n ```"
},
"fetch_message": {
"name": "fetch_message",
@@ -67,7 +67,7 @@
"kind": "class",
"path": "mail_intake.ingestion.reader.MailIntakeMessage",
"signature": "<bound method Alias.signature of Alias('MailIntakeMessage', 'mail_intake.models.message.MailIntakeMessage')>",
"docstring": "Canonical internal representation of a single email message.\n\nNotes:\n **Guarantees:**\n\n - This model represents a fully parsed and normalized email message\n - It is intentionally provider-agnostic and suitable for persistence, indexing, and downstream processing\n\n **Constraints:**\n \n - No provider-specific identifiers, payloads, or API semantics should appear in this model",
"docstring": "Canonical internal representation of a single email message.\n\nNotes:\n **Guarantees:**\n\n - This model represents a fully parsed and normalized email message.\n - It is intentionally provider-agnostic and suitable for\n persistence, indexing, and downstream processing.\n\n **Constraints:**\n\n - No provider-specific identifiers, payloads, or API semantics\n should appear in this model.",
"members": {
"message_id": {
"name": "message_id",
@@ -139,7 +139,7 @@
"kind": "class",
"path": "mail_intake.ingestion.reader.MailIntakeThread",
"signature": "<bound method Alias.signature of Alias('MailIntakeThread', 'mail_intake.models.thread.MailIntakeThread')>",
"docstring": "Canonical internal representation of an email thread.\n\nNotes:\n **Guarantees:**\n\n - A thread groups multiple related messages under a single subject and participant set\n - It is designed to support reasoning over conversational context such as job applications, interviews, follow-ups, and ongoing discussions\n - This model is provider-agnostic and safe to persist",
"docstring": "Canonical internal representation of an email thread.\n\nNotes:\n **Guarantees:**\n\n - A thread groups multiple related messages under a single subject\n and participant set.\n - It is designed to support reasoning over conversational context\n such as job applications, interviews, follow-ups, and ongoing discussions.\n - This model is provider-agnostic and safe to persist.",
"members": {
"thread_id": {
"name": "thread_id",
@@ -181,7 +181,7 @@
"kind": "function",
"path": "mail_intake.ingestion.reader.MailIntakeThread.add_message",
"signature": "<bound method Alias.signature of Alias('add_message', 'mail_intake.models.thread.MailIntakeThread.add_message')>",
"docstring": "Add a message to the thread and update derived fields.\n\nArgs:\n message (MailIntakeMessage):\n Parsed mail message to add to the thread.\n\nNotes:\n **Responsibilities:**\n\n - Appends the message to the thread\n - Tracks unique participants\n - Updates the last activity timestamp"
"docstring": "Add a message to the thread and update derived fields.\n\nArgs:\n message (MailIntakeMessage):\n Parsed mail message to add to the thread.\n\nNotes:\n **Responsibilities:**\n\n - Appends the message to the thread.\n - Tracks unique participants.\n - Updates the last activity timestamp."
}
}
},
@@ -190,56 +190,56 @@
"kind": "function",
"path": "mail_intake.ingestion.reader.parse_headers",
"signature": "<bound method Alias.signature of Alias('parse_headers', 'mail_intake.parsers.headers.parse_headers')>",
"docstring": "Convert a list of Gmail-style headers into a normalized dict.\n\nArgs:\n raw_headers (List[Dict[str, str]]):\n List of header dictionaries, each containing ``name`` and ``value`` keys.\n\nReturns:\n Dict[str, str]:\n Dictionary mapping lowercase header names to stripped values.\n\nNotes:\n **Guarantees:**\n\n - Provider payloads (such as Gmail) typically represent headers as a list of name/value mappings\n - This function normalizes them into a case-insensitive dictionary keyed by lowercase header names\n\nExample:\n Typical usage:\n \n Input:\n [\n {\"name\": \"From\", \"value\": \"John Doe <john@example.com>\"},\n {\"name\": \"Subject\", \"value\": \"Re: Interview Update\"},\n ]\n\n Output:\n {\n \"from\": \"John Doe <john@example.com>\",\n \"subject\": \"Re: Interview Update\",\n }"
"docstring": "Convert a list of Gmail-style headers into a normalized dict.\n\nArgs:\n raw_headers (List[Dict[str, str]]):\n List of header dictionaries, each containing `name` and `value` keys.\n\nReturns:\n Dict[str, str]:\n Dictionary mapping lowercase header names to stripped values.\n\nNotes:\n **Guarantees:**\n\n - Provider payloads (such as Gmail) typically represent headers as a\n list of name/value mappings.\n - This function normalizes them into a case-insensitive dictionary\n keyed by lowercase header names.\n\nExample:\n Typical usage:\n\n ```python\n Input:\n [\n {\"name\": \"From\", \"value\": \"John Doe <john@example.com>\"},\n {\"name\": \"Subject\", \"value\": \"Re: Interview Update\"},\n ]\n\n Output:\n {\n \"from\": \"John Doe <john@example.com>\",\n \"subject\": \"Re: Interview Update\",\n }\n ```"
},
"extract_sender": {
"name": "extract_sender",
"kind": "function",
"path": "mail_intake.ingestion.reader.extract_sender",
"signature": "<bound method Alias.signature of Alias('extract_sender', 'mail_intake.parsers.headers.extract_sender')>",
"docstring": "Extract sender email and optional display name from headers.\n\nArgs:\n headers (Dict[str, str]):\n Normalized header dictionary as returned by :func:`parse_headers`.\n\nReturns:\n Tuple[str, Optional[str]]:\n A tuple ``(email, name)`` where ``email`` is the sender email address and ``name`` is the display name, or ``None`` if unavailable.\n\nNotes:\n **Responsibilities:**\n\n - This function parses the ``From`` header and attempts to extract sender email address and optional human-readable display name\n\nExample:\n Typical values:\n\n ``\"John Doe <john@example.com>\"`` -> ``(\"john@example.com\", \"John Doe\")``\n ``\"john@example.com\"`` -> ``(\"john@example.com\", None)``"
"docstring": "Extract sender email and optional display name from headers.\n\nArgs:\n headers (Dict[str, str]):\n Normalized header dictionary as returned by `parse_headers()`.\n\nReturns:\n Tuple[str, Optional[str]]:\n A tuple `(email, name)` where `email` is the sender email address\n and `name` is the display name, or `None` if unavailable.\n\nNotes:\n **Responsibilities:**\n\n - This function parses the `From` header and attempts to extract\n sender email address and optional human-readable display name.\n\nExample:\n Typical values:\n\n - `\"John Doe <john@example.com>\"` -> `(\"john@example.com\", \"John Doe\")`\n - `\"john@example.com\"` -> `(\"john@example.com\", None)`"
},
"extract_body": {
"name": "extract_body",
"kind": "function",
"path": "mail_intake.ingestion.reader.extract_body",
"signature": "<bound method Alias.signature of Alias('extract_body', 'mail_intake.parsers.body.extract_body')>",
"docstring": "Extract the best-effort message body from a Gmail payload.\n\nPriority:\n1. text/plain\n2. text/html (stripped to text)\n3. Single-part body\n4. empty string (if nothing usable found)\n\nArgs:\n payload: Provider-native message payload dictionary.\n\nReturns:\n Extracted plain-text message body."
"docstring": "Extract the best-effort message body from a Gmail payload.\n\nPriority:\n\n1. `text/plain`\n2. `text/html` (stripped to text)\n3. Single-part body\n4. Empty string (if nothing usable found)\n\nArgs:\n payload (Dict[str, Any]):\n Provider-native message payload dictionary.\n\nReturns:\n str:\n Extracted plain-text message body."
},
"normalize_subject": {
"name": "normalize_subject",
"kind": "function",
"path": "mail_intake.ingestion.reader.normalize_subject",
"signature": "<bound method Alias.signature of Alias('normalize_subject', 'mail_intake.parsers.subject.normalize_subject')>",
"docstring": "Normalize an email subject for thread-level comparison.\n\nArgs:\n subject (str):\n Raw subject line from a message header.\n\nReturns:\n str:\n Normalized subject string suitable for thread grouping.\n\nNotes:\n **Responsibilities:**\n\n - Strips common prefixes such as ``Re:``, ``Fwd:``, and ``FW:``\n - Repeats prefix stripping to handle stacked prefixes\n - Collapses excessive whitespace\n - Preserves original casing (no lowercasing)\n\n **Guarantees:**\n\n - This function is intentionally conservative and avoids aggressive transformations that could alter the semantic meaning of the subject"
"docstring": "Normalize an email subject for thread-level comparison.\n\nArgs:\n subject (str):\n Raw subject line from a message header.\n\nReturns:\n str:\n Normalized subject string suitable for thread grouping.\n\nNotes:\n **Responsibilities:**\n\n - Strips common prefixes such as `Re:`, `Fwd:`, and `FW:`.\n - Repeats prefix stripping to handle stacked prefixes.\n - Collapses excessive whitespace.\n - Preserves original casing (no lowercasing).\n\n **Guarantees:**\n\n - This function is intentionally conservative and avoids aggressive\n transformations that could alter the semantic meaning of the subject."
},
"MailIntakeParsingError": {
"name": "MailIntakeParsingError",
"kind": "class",
"path": "mail_intake.ingestion.reader.MailIntakeParsingError",
"signature": "<bound method Alias.signature of Alias('MailIntakeParsingError', 'mail_intake.exceptions.MailIntakeParsingError')>",
"docstring": "Errors encountered while parsing message content.\n\nNotes:\n **Lifecycle:**\n\n - Raised when raw provider payloads cannot be interpreted or normalized into internal domain models"
"docstring": "Errors encountered while parsing message content.\n\nNotes:\n **Lifecycle:**\n\n - Raised when raw provider payloads cannot be interpreted or\n normalized into internal domain models."
},
"MailIntakeReader": {
"name": "MailIntakeReader",
"kind": "class",
"path": "mail_intake.ingestion.reader.MailIntakeReader",
"signature": "<bound method Class.signature of Class('MailIntakeReader', 32, 165)>",
"docstring": "High-level read-only ingestion interface.\n\nNotes:\n **Responsibilities:**\n\n - This class is the primary entry point for consumers of the Mail Intake library\n - It orchestrates the full ingestion pipeline: Querying the adapter for message references, fetching raw provider messages, parsing and normalizing message data, constructing domain models\n\n **Constraints:**\n \n - This class is intentionally: Provider-agnostic, stateless beyond iteration scope, read-only",
"signature": "<bound method Class.signature of Class('MailIntakeReader', 31, 171)>",
"docstring": "High-level read-only ingestion interface.\n\nNotes:\n **Responsibilities:**\n\n - This class is the primary entry point for consumers of the\n Mail Intake library.\n - It orchestrates the full ingestion pipeline:\n - Querying the adapter for message references.\n - Fetching raw provider messages.\n - Parsing and normalizing message data.\n - Constructing domain models.\n\n **Constraints:**\n\n - This class is intentionally: Provider-agnostic, stateless beyond\n iteration scope, read-only.",
"members": {
"iter_messages": {
"name": "iter_messages",
"kind": "function",
"path": "mail_intake.ingestion.reader.MailIntakeReader.iter_messages",
"signature": "<bound method Function.signature of Function('iter_messages', 57, 75)>",
"signature": "<bound method Function.signature of Function('iter_messages', 62, 80)>",
"docstring": "Iterate over parsed messages matching a provider query.\n\nArgs:\n query (str):\n Provider-specific query string used to filter messages.\n\nYields:\n MailIntakeMessage:\n Fully parsed and normalized `MailIntakeMessage` instances.\n\nRaises:\n MailIntakeParsingError:\n If a message cannot be parsed."
},
"iter_threads": {
"name": "iter_threads",
"kind": "function",
"path": "mail_intake.ingestion.reader.MailIntakeReader.iter_threads",
"signature": "<bound method Function.signature of Function('iter_threads', 77, 114)>",
"docstring": "Iterate over threads constructed from messages matching a query.\n\nArgs:\n query (str):\n Provider-specific query string used to filter messages.\n\nYields:\n MailIntakeThread:\n An iterator of `MailIntakeThread` instances.\n\nRaises:\n MailIntakeParsingError:\n If a message cannot be parsed.\n\nNotes:\n **Guarantees:**\n\n - Messages are grouped by `thread_id` and yielded as complete thread objects containing all associated messages"
"signature": "<bound method Function.signature of Function('iter_threads', 82, 120)>",
"docstring": "Iterate over threads constructed from messages matching a query.\n\nArgs:\n query (str):\n Provider-specific query string used to filter messages.\n\nYields:\n MailIntakeThread:\n An iterator of `MailIntakeThread` instances.\n\nRaises:\n `MailIntakeParsingError`:\n If a message cannot be parsed.\n\nNotes:\n **Guarantees:**\n\n - Messages are grouped by `thread_id` and yielded as complete\n thread objects containing all associated messages."
}
}
}