docs(mail_intake): add comprehensive docstrings across ingestion, adapters, auth, and parsing layers
- docs(mail_intake/__init__.py): document module-based public API and usage patterns - docs(mail_intake/ingestion/reader.py): document high-level ingestion orchestration - docs(mail_intake/adapters/base.py): document adapter contract for mail providers - docs(mail_intake/adapters/gmail.py): document Gmail adapter implementation and constraints - docs(mail_intake/auth/base.py): document authentication provider contract - docs(mail_intake/auth/google.py): document Google OAuth authentication provider - docs(mail_intake/models/message.py): document canonical email message model - docs(mail_intake/models/thread.py): document canonical email thread model - docs(mail_intake/parsers/body.py): document message body extraction logic - docs(mail_intake/parsers/headers.py): document message header normalization utilities - docs(mail_intake/parsers/subject.py): document subject normalization utilities - docs(mail_intake/config.py): document global configuration model - docs(mail_intake/exceptions.py): document library exception hierarchy
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
"""
|
||||
Domain models for Mail Intake.
|
||||
|
||||
This package defines the **canonical, provider-agnostic data models**
|
||||
used throughout the Mail Intake ingestion pipeline.
|
||||
|
||||
Models in this package:
|
||||
- Represent fully parsed and normalized mail data
|
||||
- Are safe to persist, serialize, and index
|
||||
- Contain no provider-specific payloads or API semantics
|
||||
- Serve as stable inputs for downstream processing and analysis
|
||||
|
||||
These models form the core internal data contract of the library.
|
||||
"""
|
||||
|
||||
from .message import MailIntakeMessage
|
||||
from .thread import MailIntakeThread
|
||||
|
||||
__all__ = [
|
||||
"MailIntakeMessage",
|
||||
"MailIntakeThread",
|
||||
]
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
"""
|
||||
Message domain models for Mail Intake.
|
||||
|
||||
This module defines the **canonical, provider-agnostic representation**
|
||||
of an individual email message as used internally by the Mail Intake
|
||||
ingestion pipeline.
|
||||
|
||||
Models in this module are safe to persist and must not contain any
|
||||
provider-specific fields or semantics.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict
|
||||
@@ -8,19 +19,37 @@ class MailIntakeMessage:
|
||||
"""
|
||||
Canonical internal representation of a single email message.
|
||||
|
||||
This model is provider-agnostic and safe to persist.
|
||||
No Gmail-specific fields should appear here.
|
||||
This model represents a fully parsed and normalized email message.
|
||||
It is intentionally provider-agnostic and suitable for persistence,
|
||||
indexing, and downstream processing.
|
||||
|
||||
No provider-specific identifiers, payloads, or API semantics
|
||||
should appear in this model.
|
||||
"""
|
||||
|
||||
message_id: str
|
||||
"""Provider-specific message identifier."""
|
||||
|
||||
thread_id: str
|
||||
"""Provider-specific thread identifier to which this message belongs."""
|
||||
|
||||
timestamp: datetime
|
||||
"""Message timestamp as a timezone-naive UTC datetime."""
|
||||
|
||||
from_email: str
|
||||
"""Sender email address."""
|
||||
|
||||
from_name: Optional[str]
|
||||
"""Optional human-readable sender name."""
|
||||
|
||||
subject: str
|
||||
"""Raw subject line of the message."""
|
||||
|
||||
body_text: str
|
||||
"""Extracted plain-text body content of the message."""
|
||||
|
||||
snippet: str
|
||||
"""Short provider-supplied preview snippet of the message."""
|
||||
|
||||
raw_headers: Dict[str, str]
|
||||
"""Normalized mapping of message headers (header name → value)."""
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
"""
|
||||
Thread domain models for Mail Intake.
|
||||
|
||||
This module defines the **canonical, provider-agnostic representation**
|
||||
of an email thread as used internally by the Mail Intake ingestion pipeline.
|
||||
|
||||
Threads group related messages and serve as the primary unit of reasoning
|
||||
for higher-level correspondence workflows.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import List, Set
|
||||
@@ -10,21 +20,40 @@ class MailIntakeThread:
|
||||
"""
|
||||
Canonical internal representation of an email thread.
|
||||
|
||||
Threads are the primary unit of reasoning for correspondence
|
||||
workflows (job applications, interviews, follow-ups, etc.).
|
||||
A thread groups multiple related messages under a single subject
|
||||
and participant set. It is designed to support reasoning over
|
||||
conversational context such as job applications, interviews,
|
||||
follow-ups, and ongoing discussions.
|
||||
|
||||
This model is provider-agnostic and safe to persist.
|
||||
"""
|
||||
|
||||
thread_id: str
|
||||
"""Provider-specific thread identifier."""
|
||||
|
||||
normalized_subject: str
|
||||
"""Normalized subject line used to group related messages."""
|
||||
|
||||
participants: Set[str] = field(default_factory=set)
|
||||
"""Set of unique participant email addresses observed in the thread."""
|
||||
|
||||
messages: List[MailIntakeMessage] = field(default_factory=list)
|
||||
"""Ordered list of messages belonging to this thread."""
|
||||
|
||||
last_activity_at: datetime | None = None
|
||||
"""Timestamp of the most recent message in the thread."""
|
||||
|
||||
def add_message(self, message: MailIntakeMessage) -> None:
|
||||
"""
|
||||
Add a message to the thread and update derived fields.
|
||||
|
||||
This method:
|
||||
- Appends the message to the thread
|
||||
- Tracks unique participants
|
||||
- Updates the last activity timestamp
|
||||
|
||||
Args:
|
||||
message: Parsed mail message to add to the thread.
|
||||
"""
|
||||
self.messages.append(message)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user