280 lines
21 KiB
JSON
280 lines
21 KiB
JSON
{
|
|
"module": "mail_intake.ingestion",
|
|
"content": {
|
|
"path": "mail_intake.ingestion",
|
|
"docstring": "Mail ingestion orchestration for Mail Intake.\n\nThis package contains **high-level ingestion components** responsible for\ncoordinating mail retrieval, parsing, normalization, and model construction.\n\nIt represents the **top of the ingestion pipeline** and is intended to be the\nprimary interaction surface for library consumers.\n\nComponents in this package:\n- Are provider-agnostic\n- Depend only on adapter and parser contracts\n- Contain no provider-specific API logic\n- Expose read-only ingestion workflows\n\nConsumers are expected to construct a mail adapter and pass it to the\ningestion layer to begin processing messages and threads.",
|
|
"objects": {
|
|
"MailIntakeReader": {
|
|
"name": "MailIntakeReader",
|
|
"kind": "class",
|
|
"path": "mail_intake.ingestion.MailIntakeReader",
|
|
"signature": "<bound method Alias.signature of Alias('MailIntakeReader', 'mail_intake.ingestion.reader.MailIntakeReader')>",
|
|
"docstring": "High-level read-only ingestion interface.\n\nThis class is the **primary entry point** for consumers of the Mail\nIntake library.\n\nIt orchestrates the full ingestion pipeline:\n- Querying the adapter for message references\n- Fetching raw provider messages\n- Parsing and normalizing message data\n- Constructing domain models\n\nThis class is intentionally:\n- Provider-agnostic\n- Stateless beyond iteration scope\n- Read-only",
|
|
"members": {
|
|
"iter_messages": {
|
|
"name": "iter_messages",
|
|
"kind": "function",
|
|
"path": "mail_intake.ingestion.MailIntakeReader.iter_messages",
|
|
"signature": "<bound method Alias.signature of Alias('iter_messages', 'mail_intake.ingestion.reader.MailIntakeReader.iter_messages')>",
|
|
"docstring": "Iterate over parsed messages matching a provider query.\n\nArgs:\n query: Provider-specific query string used to filter messages.\n\nYields:\n Fully parsed and normalized `MailIntakeMessage` instances.\n\nRaises:\n MailIntakeParsingError: If a message cannot be parsed."
|
|
},
|
|
"iter_threads": {
|
|
"name": "iter_threads",
|
|
"kind": "function",
|
|
"path": "mail_intake.ingestion.MailIntakeReader.iter_threads",
|
|
"signature": "<bound method Alias.signature of Alias('iter_threads', 'mail_intake.ingestion.reader.MailIntakeReader.iter_threads')>",
|
|
"docstring": "Iterate over threads constructed from messages matching a query.\n\nMessages are grouped by `thread_id` and yielded as complete thread\nobjects containing all associated messages.\n\nArgs:\n query: Provider-specific query string used to filter messages.\n\nReturns:\n An iterator of `MailIntakeThread` instances.\n\nRaises:\n MailIntakeParsingError: If a message cannot be parsed."
|
|
}
|
|
}
|
|
},
|
|
"reader": {
|
|
"name": "reader",
|
|
"kind": "module",
|
|
"path": "mail_intake.ingestion.reader",
|
|
"signature": null,
|
|
"docstring": "High-level mail ingestion orchestration for Mail Intake.\n\nThis module provides the primary, provider-agnostic entry point for\nreading and processing mail data.\n\nIt coordinates:\n- Mail adapter access\n- Message and thread iteration\n- Header and body parsing\n- Normalization and model construction\n\nNo provider-specific logic or API semantics are permitted in this layer.",
|
|
"members": {
|
|
"datetime": {
|
|
"name": "datetime",
|
|
"kind": "alias",
|
|
"path": "mail_intake.ingestion.reader.datetime",
|
|
"signature": "<bound method Alias.signature of Alias('datetime', 'datetime.datetime')>",
|
|
"docstring": null
|
|
},
|
|
"Iterator": {
|
|
"name": "Iterator",
|
|
"kind": "alias",
|
|
"path": "mail_intake.ingestion.reader.Iterator",
|
|
"signature": "<bound method Alias.signature of Alias('Iterator', 'typing.Iterator')>",
|
|
"docstring": null
|
|
},
|
|
"Dict": {
|
|
"name": "Dict",
|
|
"kind": "alias",
|
|
"path": "mail_intake.ingestion.reader.Dict",
|
|
"signature": "<bound method Alias.signature of Alias('Dict', 'typing.Dict')>",
|
|
"docstring": null
|
|
},
|
|
"Any": {
|
|
"name": "Any",
|
|
"kind": "alias",
|
|
"path": "mail_intake.ingestion.reader.Any",
|
|
"signature": "<bound method Alias.signature of Alias('Any', 'typing.Any')>",
|
|
"docstring": null
|
|
},
|
|
"MailIntakeAdapter": {
|
|
"name": "MailIntakeAdapter",
|
|
"kind": "class",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeAdapter",
|
|
"signature": "<bound method Alias.signature of Alias('MailIntakeAdapter', 'mail_intake.adapters.base.MailIntakeAdapter')>",
|
|
"docstring": "Base adapter interface for mail providers.\n\nThis interface defines the minimal contract required to:\n- Discover messages matching a query\n- Retrieve full message payloads\n- Retrieve full thread payloads\n\nAdapters are intentionally read-only and must not mutate provider state.",
|
|
"members": {
|
|
"iter_message_refs": {
|
|
"name": "iter_message_refs",
|
|
"kind": "function",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeAdapter.iter_message_refs",
|
|
"signature": "<bound method Alias.signature of Alias('iter_message_refs', 'mail_intake.adapters.base.MailIntakeAdapter.iter_message_refs')>",
|
|
"docstring": "Iterate over lightweight message references matching a query.\n\nImplementations must yield dictionaries containing at least:\n- ``message_id``: Provider-specific message identifier\n- ``thread_id``: Provider-specific thread identifier\n\nArgs:\n query: Provider-specific query string used to filter messages.\n\nYields:\n Dictionaries containing message and thread identifiers.\n\nExample yield:\n {\n \"message_id\": \"...\",\n \"thread_id\": \"...\"\n }"
|
|
},
|
|
"fetch_message": {
|
|
"name": "fetch_message",
|
|
"kind": "function",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeAdapter.fetch_message",
|
|
"signature": "<bound method Alias.signature of Alias('fetch_message', 'mail_intake.adapters.base.MailIntakeAdapter.fetch_message')>",
|
|
"docstring": "Fetch a full raw message by message identifier.\n\nArgs:\n message_id: Provider-specific message identifier.\n\nReturns:\n Provider-native message payload\n (e.g., Gmail message JSON structure)."
|
|
},
|
|
"fetch_thread": {
|
|
"name": "fetch_thread",
|
|
"kind": "function",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeAdapter.fetch_thread",
|
|
"signature": "<bound method Alias.signature of Alias('fetch_thread', 'mail_intake.adapters.base.MailIntakeAdapter.fetch_thread')>",
|
|
"docstring": "Fetch a full raw thread by thread identifier.\n\nArgs:\n thread_id: Provider-specific thread identifier.\n\nReturns:\n Provider-native thread payload."
|
|
}
|
|
}
|
|
},
|
|
"MailIntakeMessage": {
|
|
"name": "MailIntakeMessage",
|
|
"kind": "class",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeMessage",
|
|
"signature": "<bound method Alias.signature of Alias('MailIntakeMessage', 'mail_intake.models.message.MailIntakeMessage')>",
|
|
"docstring": "Canonical internal representation of a single email message.\n\nThis model represents a fully parsed and normalized email message.\nIt is intentionally provider-agnostic and suitable for persistence,\nindexing, and downstream processing.\n\nNo provider-specific identifiers, payloads, or API semantics\nshould appear in this model.",
|
|
"members": {
|
|
"message_id": {
|
|
"name": "message_id",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeMessage.message_id",
|
|
"signature": "<bound method Alias.signature of Alias('message_id', 'mail_intake.models.message.MailIntakeMessage.message_id')>",
|
|
"docstring": "Provider-specific message identifier."
|
|
},
|
|
"thread_id": {
|
|
"name": "thread_id",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeMessage.thread_id",
|
|
"signature": "<bound method Alias.signature of Alias('thread_id', 'mail_intake.models.message.MailIntakeMessage.thread_id')>",
|
|
"docstring": "Provider-specific thread identifier to which this message belongs."
|
|
},
|
|
"timestamp": {
|
|
"name": "timestamp",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeMessage.timestamp",
|
|
"signature": "<bound method Alias.signature of Alias('timestamp', 'mail_intake.models.message.MailIntakeMessage.timestamp')>",
|
|
"docstring": "Message timestamp as a timezone-naive UTC datetime."
|
|
},
|
|
"from_email": {
|
|
"name": "from_email",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeMessage.from_email",
|
|
"signature": "<bound method Alias.signature of Alias('from_email', 'mail_intake.models.message.MailIntakeMessage.from_email')>",
|
|
"docstring": "Sender email address."
|
|
},
|
|
"from_name": {
|
|
"name": "from_name",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeMessage.from_name",
|
|
"signature": "<bound method Alias.signature of Alias('from_name', 'mail_intake.models.message.MailIntakeMessage.from_name')>",
|
|
"docstring": "Optional human-readable sender name."
|
|
},
|
|
"subject": {
|
|
"name": "subject",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeMessage.subject",
|
|
"signature": "<bound method Alias.signature of Alias('subject', 'mail_intake.models.message.MailIntakeMessage.subject')>",
|
|
"docstring": "Raw subject line of the message."
|
|
},
|
|
"body_text": {
|
|
"name": "body_text",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeMessage.body_text",
|
|
"signature": "<bound method Alias.signature of Alias('body_text', 'mail_intake.models.message.MailIntakeMessage.body_text')>",
|
|
"docstring": "Extracted plain-text body content of the message."
|
|
},
|
|
"snippet": {
|
|
"name": "snippet",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeMessage.snippet",
|
|
"signature": "<bound method Alias.signature of Alias('snippet', 'mail_intake.models.message.MailIntakeMessage.snippet')>",
|
|
"docstring": "Short provider-supplied preview snippet of the message."
|
|
},
|
|
"raw_headers": {
|
|
"name": "raw_headers",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeMessage.raw_headers",
|
|
"signature": "<bound method Alias.signature of Alias('raw_headers', 'mail_intake.models.message.MailIntakeMessage.raw_headers')>",
|
|
"docstring": "Normalized mapping of message headers (header name → value)."
|
|
}
|
|
}
|
|
},
|
|
"MailIntakeThread": {
|
|
"name": "MailIntakeThread",
|
|
"kind": "class",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeThread",
|
|
"signature": "<bound method Alias.signature of Alias('MailIntakeThread', 'mail_intake.models.thread.MailIntakeThread')>",
|
|
"docstring": "Canonical internal representation of an email thread.\n\nA thread groups multiple related messages under a single subject\nand participant set. It is designed to support reasoning over\nconversational context such as job applications, interviews,\nfollow-ups, and ongoing discussions.\n\nThis model is provider-agnostic and safe to persist.",
|
|
"members": {
|
|
"thread_id": {
|
|
"name": "thread_id",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeThread.thread_id",
|
|
"signature": "<bound method Alias.signature of Alias('thread_id', 'mail_intake.models.thread.MailIntakeThread.thread_id')>",
|
|
"docstring": "Provider-specific thread identifier."
|
|
},
|
|
"normalized_subject": {
|
|
"name": "normalized_subject",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeThread.normalized_subject",
|
|
"signature": "<bound method Alias.signature of Alias('normalized_subject', 'mail_intake.models.thread.MailIntakeThread.normalized_subject')>",
|
|
"docstring": "Normalized subject line used to group related messages."
|
|
},
|
|
"participants": {
|
|
"name": "participants",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeThread.participants",
|
|
"signature": "<bound method Alias.signature of Alias('participants', 'mail_intake.models.thread.MailIntakeThread.participants')>",
|
|
"docstring": "Set of unique participant email addresses observed in the thread."
|
|
},
|
|
"messages": {
|
|
"name": "messages",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeThread.messages",
|
|
"signature": "<bound method Alias.signature of Alias('messages', 'mail_intake.models.thread.MailIntakeThread.messages')>",
|
|
"docstring": "Ordered list of messages belonging to this thread."
|
|
},
|
|
"last_activity_at": {
|
|
"name": "last_activity_at",
|
|
"kind": "attribute",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeThread.last_activity_at",
|
|
"signature": "<bound method Alias.signature of Alias('last_activity_at', 'mail_intake.models.thread.MailIntakeThread.last_activity_at')>",
|
|
"docstring": "Timestamp of the most recent message in the thread."
|
|
},
|
|
"add_message": {
|
|
"name": "add_message",
|
|
"kind": "function",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeThread.add_message",
|
|
"signature": "<bound method Alias.signature of Alias('add_message', 'mail_intake.models.thread.MailIntakeThread.add_message')>",
|
|
"docstring": "Add a message to the thread and update derived fields.\n\nThis method:\n- Appends the message to the thread\n- Tracks unique participants\n- Updates the last activity timestamp\n\nArgs:\n message: Parsed mail message to add to the thread."
|
|
}
|
|
}
|
|
},
|
|
"parse_headers": {
|
|
"name": "parse_headers",
|
|
"kind": "function",
|
|
"path": "mail_intake.ingestion.reader.parse_headers",
|
|
"signature": "<bound method Alias.signature of Alias('parse_headers', 'mail_intake.parsers.headers.parse_headers')>",
|
|
"docstring": "Convert a list of Gmail-style headers into a normalized dict.\n\nProvider payloads (such as Gmail) typically represent headers as a list\nof name/value mappings. This function normalizes them into a\ncase-insensitive dictionary keyed by lowercase header names.\n\nArgs:\n raw_headers: List of header dictionaries, each containing\n ``name`` and ``value`` keys.\n\nReturns:\n Dictionary mapping lowercase header names to stripped values.\n\nExample:\n Input:\n [\n {\"name\": \"From\", \"value\": \"John Doe <john@example.com>\"},\n {\"name\": \"Subject\", \"value\": \"Re: Interview Update\"},\n ]\n\n Output:\n {\n \"from\": \"John Doe <john@example.com>\",\n \"subject\": \"Re: Interview Update\",\n }"
|
|
},
|
|
"extract_sender": {
|
|
"name": "extract_sender",
|
|
"kind": "function",
|
|
"path": "mail_intake.ingestion.reader.extract_sender",
|
|
"signature": "<bound method Alias.signature of Alias('extract_sender', 'mail_intake.parsers.headers.extract_sender')>",
|
|
"docstring": "Extract sender email and optional display name from headers.\n\nThis function parses the ``From`` header and attempts to extract:\n- Sender email address\n- Optional human-readable display name\n\nArgs:\n headers: Normalized header dictionary as returned by\n :func:`parse_headers`.\n\nReturns:\n A tuple ``(email, name)`` where:\n - ``email`` is the sender email address\n - ``name`` is the display name, or ``None`` if unavailable\n\nExamples:\n ``\"John Doe <john@example.com>\"`` → ``(\"john@example.com\", \"John Doe\")``\n ``\"john@example.com\"`` → ``(\"john@example.com\", None)``"
|
|
},
|
|
"extract_body": {
|
|
"name": "extract_body",
|
|
"kind": "function",
|
|
"path": "mail_intake.ingestion.reader.extract_body",
|
|
"signature": "<bound method Alias.signature of Alias('extract_body', 'mail_intake.parsers.body.extract_body')>",
|
|
"docstring": "Extract the best-effort message body from a Gmail payload.\n\nPriority:\n1. text/plain\n2. text/html (stripped to text)\n3. Single-part body\n4. empty string (if nothing usable found)\n\nArgs:\n payload: Provider-native message payload dictionary.\n\nReturns:\n Extracted plain-text message body."
|
|
},
|
|
"normalize_subject": {
|
|
"name": "normalize_subject",
|
|
"kind": "function",
|
|
"path": "mail_intake.ingestion.reader.normalize_subject",
|
|
"signature": "<bound method Alias.signature of Alias('normalize_subject', 'mail_intake.parsers.subject.normalize_subject')>",
|
|
"docstring": "Normalize an email subject for thread-level comparison.\n\nOperations:\n- Strips common prefixes such as ``Re:``, ``Fwd:``, and ``FW:``\n- Repeats prefix stripping to handle stacked prefixes\n- Collapses excessive whitespace\n- Preserves original casing (no lowercasing)\n\nThis function is intentionally conservative and avoids aggressive\ntransformations that could alter the semantic meaning of the subject.\n\nArgs:\n subject: Raw subject line from a message header.\n\nReturns:\n Normalized subject string suitable for thread grouping."
|
|
},
|
|
"MailIntakeParsingError": {
|
|
"name": "MailIntakeParsingError",
|
|
"kind": "class",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeParsingError",
|
|
"signature": "<bound method Alias.signature of Alias('MailIntakeParsingError', 'mail_intake.exceptions.MailIntakeParsingError')>",
|
|
"docstring": "Errors encountered while parsing message content.\n\nRaised when raw provider payloads cannot be interpreted\nor normalized into internal domain models."
|
|
},
|
|
"MailIntakeReader": {
|
|
"name": "MailIntakeReader",
|
|
"kind": "class",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeReader",
|
|
"signature": "<bound method Class.signature of Class('MailIntakeReader', 28, 155)>",
|
|
"docstring": "High-level read-only ingestion interface.\n\nThis class is the **primary entry point** for consumers of the Mail\nIntake library.\n\nIt orchestrates the full ingestion pipeline:\n- Querying the adapter for message references\n- Fetching raw provider messages\n- Parsing and normalizing message data\n- Constructing domain models\n\nThis class is intentionally:\n- Provider-agnostic\n- Stateless beyond iteration scope\n- Read-only",
|
|
"members": {
|
|
"iter_messages": {
|
|
"name": "iter_messages",
|
|
"kind": "function",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeReader.iter_messages",
|
|
"signature": "<bound method Function.signature of Function('iter_messages', 57, 72)>",
|
|
"docstring": "Iterate over parsed messages matching a provider query.\n\nArgs:\n query: Provider-specific query string used to filter messages.\n\nYields:\n Fully parsed and normalized `MailIntakeMessage` instances.\n\nRaises:\n MailIntakeParsingError: If a message cannot be parsed."
|
|
},
|
|
"iter_threads": {
|
|
"name": "iter_threads",
|
|
"kind": "function",
|
|
"path": "mail_intake.ingestion.reader.MailIntakeReader.iter_threads",
|
|
"signature": "<bound method Function.signature of Function('iter_threads', 74, 106)>",
|
|
"docstring": "Iterate over threads constructed from messages matching a query.\n\nMessages are grouped by `thread_id` and yielded as complete thread\nobjects containing all associated messages.\n\nArgs:\n query: Provider-specific query string used to filter messages.\n\nReturns:\n An iterator of `MailIntakeThread` instances.\n\nRaises:\n MailIntakeParsingError: If a message cannot be parsed."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} |