Compare commits
17 Commits
412a9c7bec
...
0.0.2
| Author | SHA1 | Date | |
|---|---|---|---|
| b6f64615ae | |||
| 9e534ed961 | |||
| 985194cd5b | |||
| 91eab636bb | |||
| 4e63c36199 | |||
| 24b3b04cfe | |||
| 08136d8a64 | |||
| 677ead8ef5 | |||
| 4cf5110684 | |||
| 77dabf8df8 | |||
| b14ffe9e44 | |||
| f22af90e98 | |||
| dbfef295b8 | |||
| 505950eafa | |||
| 3a550ab576 | |||
| 44d36575c6 | |||
| b18b717c52 |
129
.drone.yml
Normal file
129
.drone.yml
Normal file
@@ -0,0 +1,129 @@
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: build-and-publish-pypi
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
workspace:
|
||||
path: /drone/src
|
||||
|
||||
steps:
|
||||
- name: check-version
|
||||
image: curlimages/curl:latest
|
||||
environment:
|
||||
PIP_REPO_URL:
|
||||
from_secret: PIP_REPO_URL
|
||||
PIP_USERNAME:
|
||||
from_secret: PIP_USERNAME
|
||||
PIP_PASSWORD:
|
||||
from_secret: PIP_PASSWORD
|
||||
commands:
|
||||
- PACKAGE_NAME=$(grep -E '^name\s*=' pyproject.toml | head -1 | cut -d'"' -f2)
|
||||
- VERSION=$(grep -E '^version\s*=' pyproject.toml | head -1 | cut -d'"' -f2)
|
||||
- echo "🔍 Checking if $PACKAGE_NAME==$VERSION exists on $PIP_REPO_URL ..."
|
||||
- |
|
||||
if curl -fsSL -u "$PIP_USERNAME:$PIP_PASSWORD" "$PIP_REPO_URL/simple/$PACKAGE_NAME/" | grep -q "$VERSION"; then
|
||||
echo "✅ $PACKAGE_NAME==$VERSION already exists — skipping build."
|
||||
exit 78
|
||||
else
|
||||
echo "🆕 New version detected: $PACKAGE_NAME==$VERSION"
|
||||
fi
|
||||
|
||||
- name: build-package
|
||||
image: python:3.13-slim
|
||||
commands:
|
||||
- pip install --upgrade pip build
|
||||
- echo "📦 Building Python package..."
|
||||
- python -m build
|
||||
- ls -l dist
|
||||
|
||||
- name: upload-to-private-pypi
|
||||
image: python:3.13-slim
|
||||
environment:
|
||||
PIP_REPO_URL:
|
||||
from_secret: PIP_REPO_URL
|
||||
PIP_USERNAME:
|
||||
from_secret: PIP_USERNAME
|
||||
PIP_PASSWORD:
|
||||
from_secret: PIP_PASSWORD
|
||||
commands:
|
||||
- pip install --upgrade twine
|
||||
- echo "🚀 Uploading to private PyPI at $PIP_REPO_URL ..."
|
||||
- |
|
||||
twine upload \
|
||||
--repository-url "$PIP_REPO_URL" \
|
||||
-u "$PIP_USERNAME" \
|
||||
-p "$PIP_PASSWORD" \
|
||||
dist/*
|
||||
|
||||
trigger:
|
||||
event:
|
||||
- tag
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: backfill-pypi-from-tags
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
workspace:
|
||||
path: /drone/src
|
||||
|
||||
steps:
|
||||
- name: fetch-tags
|
||||
image: alpine/git
|
||||
commands:
|
||||
- git fetch --tags --force
|
||||
|
||||
- name: build-and-upload-missing
|
||||
image: python:3.13-slim
|
||||
environment:
|
||||
PIP_REPO_URL:
|
||||
from_secret: PIP_REPO_URL
|
||||
PIP_USERNAME:
|
||||
from_secret: PIP_USERNAME
|
||||
PIP_PASSWORD:
|
||||
from_secret: PIP_PASSWORD
|
||||
commands:
|
||||
- apt-get update
|
||||
- apt-get install -y git curl ca-certificates
|
||||
- pip install --upgrade pip build twine
|
||||
- |
|
||||
set -e
|
||||
|
||||
PACKAGE_NAME=$(grep -E '^name\s*=' pyproject.toml | cut -d'"' -f2)
|
||||
echo "📦 Package: $PACKAGE_NAME"
|
||||
|
||||
for TAG in $(git tag --sort=version:refname); do
|
||||
VERSION="$TAG"
|
||||
echo "🔁 Version: $VERSION"
|
||||
|
||||
if curl -fsSL -u "$PIP_USERNAME:$PIP_PASSWORD" \
|
||||
"$PIP_REPO_URL/simple/$PACKAGE_NAME/" | grep -q "$VERSION"; then
|
||||
echo "⏭️ Exists, skipping"
|
||||
continue
|
||||
fi
|
||||
|
||||
git checkout --force "$TAG"
|
||||
|
||||
echo "🏗️ Building $VERSION"
|
||||
rm -rf dist
|
||||
python -m build
|
||||
|
||||
echo "⬆️ Uploading $VERSION"
|
||||
twine upload \
|
||||
--repository-url "$PIP_REPO_URL" \
|
||||
-u "$PIP_USERNAME" \
|
||||
-p "$PIP_PASSWORD" \
|
||||
dist/*
|
||||
done
|
||||
|
||||
trigger:
|
||||
event:
|
||||
- custom
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -38,3 +38,8 @@ Thumbs.db
|
||||
*.swo
|
||||
*~
|
||||
*.tmp
|
||||
|
||||
# Credentials
|
||||
client_secret_*.json
|
||||
token.pickle
|
||||
credentials*.json
|
||||
3
docs/mail_intake/adapters/base.md
Normal file
3
docs/mail_intake/adapters/base.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Base
|
||||
|
||||
::: mail_intake.adapters.base
|
||||
3
docs/mail_intake/adapters/gmail.md
Normal file
3
docs/mail_intake/adapters/gmail.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Gmail
|
||||
|
||||
::: mail_intake.adapters.gmail
|
||||
3
docs/mail_intake/adapters/index.md
Normal file
3
docs/mail_intake/adapters/index.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Adapters
|
||||
|
||||
::: mail_intake.adapters
|
||||
3
docs/mail_intake/auth/base.md
Normal file
3
docs/mail_intake/auth/base.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Base
|
||||
|
||||
::: mail_intake.auth.base
|
||||
3
docs/mail_intake/auth/google.md
Normal file
3
docs/mail_intake/auth/google.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Google
|
||||
|
||||
::: mail_intake.auth.google
|
||||
3
docs/mail_intake/auth/index.md
Normal file
3
docs/mail_intake/auth/index.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Auth
|
||||
|
||||
::: mail_intake.auth
|
||||
3
docs/mail_intake/config.md
Normal file
3
docs/mail_intake/config.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Config
|
||||
|
||||
::: mail_intake.config
|
||||
3
docs/mail_intake/exceptions.md
Normal file
3
docs/mail_intake/exceptions.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Exceptions
|
||||
|
||||
::: mail_intake.exceptions
|
||||
3
docs/mail_intake/index.md
Normal file
3
docs/mail_intake/index.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Mail Intake
|
||||
|
||||
::: mail_intake
|
||||
3
docs/mail_intake/ingestion/index.md
Normal file
3
docs/mail_intake/ingestion/index.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Ingestion
|
||||
|
||||
::: mail_intake.ingestion
|
||||
3
docs/mail_intake/ingestion/reader.md
Normal file
3
docs/mail_intake/ingestion/reader.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Reader
|
||||
|
||||
::: mail_intake.ingestion.reader
|
||||
3
docs/mail_intake/models/index.md
Normal file
3
docs/mail_intake/models/index.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Models
|
||||
|
||||
::: mail_intake.models
|
||||
3
docs/mail_intake/models/message.md
Normal file
3
docs/mail_intake/models/message.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Message
|
||||
|
||||
::: mail_intake.models.message
|
||||
3
docs/mail_intake/models/thread.md
Normal file
3
docs/mail_intake/models/thread.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Thread
|
||||
|
||||
::: mail_intake.models.thread
|
||||
3
docs/mail_intake/parsers/body.md
Normal file
3
docs/mail_intake/parsers/body.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Body
|
||||
|
||||
::: mail_intake.parsers.body
|
||||
3
docs/mail_intake/parsers/headers.md
Normal file
3
docs/mail_intake/parsers/headers.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Headers
|
||||
|
||||
::: mail_intake.parsers.headers
|
||||
3
docs/mail_intake/parsers/index.md
Normal file
3
docs/mail_intake/parsers/index.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Parsers
|
||||
|
||||
::: mail_intake.parsers
|
||||
3
docs/mail_intake/parsers/subject.md
Normal file
3
docs/mail_intake/parsers/subject.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Subject
|
||||
|
||||
::: mail_intake.parsers.subject
|
||||
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Mail Intake — provider-agnostic, read-only email ingestion framework.
|
||||
|
||||
Mail Intake is a **contract-first library** designed to ingest, parse, and
|
||||
normalize email data from external providers (such as Gmail) into clean,
|
||||
provider-agnostic domain models.
|
||||
|
||||
The library is intentionally structured around clear layers, each exposed
|
||||
as a first-class module at the package root:
|
||||
|
||||
- adapters: provider-specific access (e.g. Gmail)
|
||||
- auth: authentication providers and credential lifecycle management
|
||||
- credentials: credential persistence abstractions and implementations
|
||||
- parsers: extraction and normalization of message content
|
||||
- ingestion: orchestration and high-level ingestion workflows
|
||||
- models: canonical, provider-agnostic data representations
|
||||
- config: explicit global configuration
|
||||
- exceptions: library-defined error hierarchy
|
||||
|
||||
The package root acts as a **namespace**, not a facade. Consumers are
|
||||
expected to import functionality explicitly from the appropriate module.
|
||||
|
||||
----------------------------------------------------------------------
|
||||
Installation
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Install using pip:
|
||||
|
||||
pip install mail-intake
|
||||
|
||||
Or with Poetry:
|
||||
|
||||
poetry add mail-intake
|
||||
|
||||
Mail Intake is pure Python and has no runtime dependencies beyond those
|
||||
required by the selected provider (for example, Google APIs for Gmail).
|
||||
|
||||
----------------------------------------------------------------------
|
||||
Basic Usage
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Minimal Gmail ingestion example (local development):
|
||||
|
||||
from mail_intake.ingestion import MailIntakeReader
|
||||
from mail_intake.adapters import MailIntakeGmailAdapter
|
||||
from mail_intake.auth import MailIntakeGoogleAuth
|
||||
from mail_intake.credentials import PickleCredentialStore
|
||||
|
||||
store = PickleCredentialStore(path="token.pickle")
|
||||
|
||||
auth = MailIntakeGoogleAuth(
|
||||
credentials_path="credentials.json",
|
||||
store=store,
|
||||
scopes=["https://www.googleapis.com/auth/gmail.readonly"],
|
||||
)
|
||||
|
||||
adapter = MailIntakeGmailAdapter(auth_provider=auth)
|
||||
reader = MailIntakeReader(adapter)
|
||||
|
||||
for message in reader.iter_messages("from:recruiter@example.com"):
|
||||
print(message.subject, message.from_email)
|
||||
|
||||
Iterating over threads:
|
||||
|
||||
for thread in reader.iter_threads("subject:Interview"):
|
||||
print(thread.normalized_subject, len(thread.messages))
|
||||
|
||||
----------------------------------------------------------------------
|
||||
Extensibility Model
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Mail Intake is designed to be extensible via **public contracts** exposed
|
||||
through its modules:
|
||||
|
||||
- Users MAY implement their own mail adapters by subclassing
|
||||
``adapters.MailIntakeAdapter``
|
||||
- Users MAY implement their own authentication providers by subclassing
|
||||
``auth.MailIntakeAuthProvider[T]``
|
||||
- Users MAY implement their own credential persistence layers by
|
||||
implementing ``credentials.CredentialStore[T]``
|
||||
|
||||
Users SHOULD NOT subclass built-in adapter implementations. Built-in
|
||||
adapters (such as Gmail) are reference implementations and may change
|
||||
internally without notice.
|
||||
|
||||
----------------------------------------------------------------------
|
||||
Public API Surface
|
||||
----------------------------------------------------------------------
|
||||
|
||||
The supported public API consists of the following top-level modules:
|
||||
|
||||
- mail_intake.ingestion
|
||||
- mail_intake.adapters
|
||||
- mail_intake.auth
|
||||
- mail_intake.credentials
|
||||
- mail_intake.parsers
|
||||
- mail_intake.models
|
||||
- mail_intake.config
|
||||
- mail_intake.exceptions
|
||||
|
||||
Classes and functions should be imported explicitly from these modules.
|
||||
No individual symbols are re-exported at the package root.
|
||||
|
||||
----------------------------------------------------------------------
|
||||
Design Guarantees
|
||||
----------------------------------------------------------------------
|
||||
|
||||
- Read-only access: no mutation of provider state
|
||||
- Provider-agnostic domain models
|
||||
- Explicit configuration and dependency injection
|
||||
- No implicit global state or environment reads
|
||||
- Deterministic, testable behavior
|
||||
- Distributed-safe authentication design
|
||||
|
||||
Mail Intake favors correctness, clarity, and explicitness over convenience
|
||||
shortcuts.
|
||||
"""
|
||||
|
||||
|
||||
from . import ingestion
|
||||
from . import adapters
|
||||
from . import auth
|
||||
from . import credentials
|
||||
from . import models
|
||||
from . import config
|
||||
from . import exceptions
|
||||
|
||||
__all__ = [
|
||||
"ingestion",
|
||||
"adapters",
|
||||
"auth",
|
||||
"credentials",
|
||||
"models",
|
||||
"config",
|
||||
"exceptions",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
"""
|
||||
Mail provider adapter implementations for Mail Intake.
|
||||
|
||||
This package contains **adapter-layer implementations** responsible for
|
||||
interfacing with external mail providers and exposing a normalized,
|
||||
provider-agnostic contract to the rest of the system.
|
||||
|
||||
Adapters in this package:
|
||||
- Implement the `MailIntakeAdapter` interface
|
||||
- Encapsulate all provider-specific APIs and semantics
|
||||
- Perform read-only access to mail data
|
||||
- Return provider-native payloads without interpretation
|
||||
|
||||
Provider-specific logic **must not leak** outside of adapter implementations.
|
||||
All parsings, normalizations, and transformations must be handled by downstream
|
||||
components.
|
||||
|
||||
Public adapters exported from this package are considered the supported
|
||||
integration surface for mail providers.
|
||||
"""
|
||||
|
||||
from .base import MailIntakeAdapter
|
||||
from .gmail import MailIntakeGmailAdapter
|
||||
|
||||
__all__ = [
|
||||
"MailIntakeAdapter",
|
||||
"MailIntakeGmailAdapter",
|
||||
]
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
"""
|
||||
Mail provider adapter contracts for Mail Intake.
|
||||
|
||||
This module defines the **provider-agnostic adapter interface** used for
|
||||
read-only mail ingestion.
|
||||
|
||||
Adapters encapsulate all provider-specific access logic and expose a
|
||||
minimal, normalized contract to the rest of the system. No provider-specific
|
||||
types or semantics should leak beyond implementations of this interface.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Iterator, Dict, Any
|
||||
|
||||
@@ -6,19 +17,28 @@ class MailIntakeAdapter(ABC):
|
||||
"""
|
||||
Base adapter interface for mail providers.
|
||||
|
||||
This interface defines the minimal contract required for
|
||||
read-only mail ingestion. No provider-specific concepts
|
||||
should leak beyond implementations of this class.
|
||||
This interface defines the minimal contract required to:
|
||||
- Discover messages matching a query
|
||||
- Retrieve full message payloads
|
||||
- Retrieve full thread payloads
|
||||
|
||||
Adapters are intentionally read-only and must not mutate provider state.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def iter_message_refs(self, query: str) -> Iterator[Dict[str, str]]:
|
||||
"""
|
||||
Iterate over lightweight message references.
|
||||
Iterate over lightweight message references matching a query.
|
||||
|
||||
Must yield dictionaries containing at least:
|
||||
- message_id
|
||||
- thread_id
|
||||
Implementations must yield dictionaries containing at least:
|
||||
- ``message_id``: Provider-specific message identifier
|
||||
- ``thread_id``: Provider-specific thread identifier
|
||||
|
||||
Args:
|
||||
query: Provider-specific query string used to filter messages.
|
||||
|
||||
Yields:
|
||||
Dictionaries containing message and thread identifiers.
|
||||
|
||||
Example yield:
|
||||
{
|
||||
@@ -31,18 +51,26 @@ class MailIntakeAdapter(ABC):
|
||||
@abstractmethod
|
||||
def fetch_message(self, message_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Fetch a full raw message by message_id.
|
||||
Fetch a full raw message by message identifier.
|
||||
|
||||
Returns the provider-native message payload
|
||||
(e.g., Gmail message JSON).
|
||||
Args:
|
||||
message_id: Provider-specific message identifier.
|
||||
|
||||
Returns:
|
||||
Provider-native message payload
|
||||
(e.g., Gmail message JSON structure).
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def fetch_thread(self, thread_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Fetch a full raw thread by thread_id.
|
||||
Fetch a full raw thread by thread identifier.
|
||||
|
||||
Returns the provider-native thread payload.
|
||||
Args:
|
||||
thread_id: Provider-specific thread identifier.
|
||||
|
||||
Returns:
|
||||
Provider-native thread payload.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -1,3 +1,17 @@
|
||||
"""
|
||||
Gmail adapter implementation for Mail Intake.
|
||||
|
||||
This module provides a **Gmail-specific implementation** of the
|
||||
`MailIntakeAdapter` contract.
|
||||
|
||||
It is the only place in the codebase where:
|
||||
- `googleapiclient` is imported
|
||||
- Gmail REST API semantics are known
|
||||
- Low-level `.execute()` calls are made
|
||||
|
||||
All Gmail-specific behavior must be strictly contained within this module.
|
||||
"""
|
||||
|
||||
from typing import Iterator, Dict, Any
|
||||
|
||||
from googleapiclient.discovery import build
|
||||
@@ -12,12 +26,19 @@ class MailIntakeGmailAdapter(MailIntakeAdapter):
|
||||
"""
|
||||
Gmail read-only adapter.
|
||||
|
||||
This adapter implements the `MailIntakeAdapter` interface using the
|
||||
Gmail REST API. It translates the generic mail intake contract into
|
||||
Gmail-specific API calls.
|
||||
|
||||
This class is the ONLY place where:
|
||||
- googleapiclient is imported
|
||||
- Gmail REST semantics are known
|
||||
- .execute() is called
|
||||
|
||||
It must remain thin and dumb by design.
|
||||
Design constraints:
|
||||
- Must remain thin and imperative
|
||||
- Must not perform parsing or interpretation
|
||||
- Must not expose Gmail-specific types beyond this class
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -25,12 +46,29 @@ class MailIntakeGmailAdapter(MailIntakeAdapter):
|
||||
auth_provider: MailIntakeAuthProvider,
|
||||
user_id: str = "me",
|
||||
):
|
||||
"""
|
||||
Initialize the Gmail adapter.
|
||||
|
||||
Args:
|
||||
auth_provider: Authentication provider capable of supplying
|
||||
valid Gmail API credentials.
|
||||
user_id: Gmail user identifier. Defaults to `"me"`.
|
||||
"""
|
||||
self._auth_provider = auth_provider
|
||||
self._user_id = user_id
|
||||
self._service = None
|
||||
|
||||
@property
|
||||
def service(self):
|
||||
"""
|
||||
Lazily initialize and return the Gmail API service client.
|
||||
|
||||
Returns:
|
||||
Initialized Gmail API service instance.
|
||||
|
||||
Raises:
|
||||
MailIntakeAdapterError: If the Gmail service cannot be initialized.
|
||||
"""
|
||||
if self._service is None:
|
||||
try:
|
||||
creds = self._auth_provider.get_credentials()
|
||||
@@ -45,11 +83,16 @@ class MailIntakeGmailAdapter(MailIntakeAdapter):
|
||||
"""
|
||||
Iterate over message references matching the query.
|
||||
|
||||
Args:
|
||||
query: Gmail search query string.
|
||||
|
||||
Yields:
|
||||
{
|
||||
"message_id": "...",
|
||||
"thread_id": "..."
|
||||
}
|
||||
Dictionaries containing:
|
||||
- ``message_id``: Gmail message ID
|
||||
- ``thread_id``: Gmail thread ID
|
||||
|
||||
Raises:
|
||||
MailIntakeAdapterError: If the Gmail API returns an error.
|
||||
"""
|
||||
try:
|
||||
request = (
|
||||
@@ -79,6 +122,18 @@ class MailIntakeGmailAdapter(MailIntakeAdapter):
|
||||
) from exc
|
||||
|
||||
def fetch_message(self, message_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Fetch a full Gmail message by message ID.
|
||||
|
||||
Args:
|
||||
message_id: Gmail message identifier.
|
||||
|
||||
Returns:
|
||||
Provider-native Gmail message payload.
|
||||
|
||||
Raises:
|
||||
MailIntakeAdapterError: If the Gmail API returns an error.
|
||||
"""
|
||||
try:
|
||||
return (
|
||||
self.service.users()
|
||||
@@ -92,6 +147,18 @@ class MailIntakeGmailAdapter(MailIntakeAdapter):
|
||||
) from exc
|
||||
|
||||
def fetch_thread(self, thread_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Fetch a full Gmail thread by thread ID.
|
||||
|
||||
Args:
|
||||
thread_id: Gmail thread identifier.
|
||||
|
||||
Returns:
|
||||
Provider-native Gmail thread payload.
|
||||
|
||||
Raises:
|
||||
MailIntakeAdapterError: If the Gmail API returns an error.
|
||||
"""
|
||||
try:
|
||||
return (
|
||||
self.service.users()
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
"""
|
||||
Authentication provider implementations for Mail Intake.
|
||||
|
||||
This package defines the **authentication layer** used by mail adapters
|
||||
to obtain provider-specific credentials.
|
||||
|
||||
It exposes:
|
||||
- A stable, provider-agnostic authentication contract
|
||||
- Concrete authentication providers for supported platforms
|
||||
|
||||
Authentication providers:
|
||||
- Are responsible for credential acquisition and lifecycle management
|
||||
- Are intentionally decoupled from adapter logic
|
||||
- May be extended by users to support additional providers
|
||||
|
||||
Consumers should depend on the abstract interface and use concrete
|
||||
implementations only where explicitly required.
|
||||
"""
|
||||
|
||||
from .base import MailIntakeAuthProvider
|
||||
from .google import MailIntakeGoogleAuth
|
||||
|
||||
__all__ = [
|
||||
"MailIntakeAuthProvider",
|
||||
"MailIntakeGoogleAuth",
|
||||
]
|
||||
|
||||
@@ -1,20 +1,59 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class MailIntakeAuthProvider(ABC):
|
||||
"""
|
||||
Abstract authentication provider.
|
||||
Authentication provider contracts for Mail Intake.
|
||||
|
||||
Mail adapters depend on this interface, not on concrete
|
||||
OAuth or credential implementations.
|
||||
This module defines the **authentication abstraction layer** used by mail
|
||||
adapters to obtain provider-specific credentials.
|
||||
|
||||
Authentication concerns are intentionally decoupled from adapter logic.
|
||||
Adapters depend only on this interface and must not be aware of how
|
||||
credentials are acquired, refreshed, or persisted.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Generic, TypeVar
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class MailIntakeAuthProvider(ABC, Generic[T]):
|
||||
"""
|
||||
Abstract base class for authentication providers.
|
||||
|
||||
This interface enforces a strict contract between authentication
|
||||
providers and mail adapters by requiring providers to explicitly
|
||||
declare the type of credentials they return.
|
||||
|
||||
Authentication providers encapsulate all logic required to:
|
||||
- Acquire credentials from an external provider
|
||||
- Refresh or revalidate credentials as needed
|
||||
- Handle authentication-specific failure modes
|
||||
- Coordinate with credential persistence layers where applicable
|
||||
|
||||
Mail adapters must treat returned credentials as opaque and
|
||||
provider-specific, relying only on the declared credential type
|
||||
expected by the adapter.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_credentials(self):
|
||||
def get_credentials(self) -> T:
|
||||
"""
|
||||
Return provider-specific credentials object.
|
||||
Retrieve valid, provider-specific credentials.
|
||||
|
||||
This method is synchronous by design and must either
|
||||
return valid credentials or raise MailIntakeAuthError.
|
||||
This method is synchronous by design and represents the sole
|
||||
entry point through which adapters obtain authentication
|
||||
material.
|
||||
|
||||
Implementations must either return credentials of the declared
|
||||
type ``T`` that are valid at the time of return or raise an
|
||||
authentication-specific exception.
|
||||
|
||||
Returns:
|
||||
Credentials of type ``T`` suitable for immediate use by the
|
||||
corresponding mail adapter.
|
||||
|
||||
Raises:
|
||||
Exception:
|
||||
An authentication-specific exception indicating that
|
||||
credentials could not be obtained or validated.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -1,12 +1,28 @@
|
||||
"""
|
||||
Google authentication provider implementation for Mail Intake.
|
||||
|
||||
This module provides a **Google OAuth–based authentication provider**
|
||||
used primarily for Gmail access.
|
||||
|
||||
It encapsulates all Google-specific authentication concerns, including:
|
||||
- Credential loading and persistence
|
||||
- Token refresh handling
|
||||
- Interactive OAuth flow initiation
|
||||
- Coordination with a credential persistence layer
|
||||
|
||||
No Google authentication details should leak outside this module.
|
||||
"""
|
||||
|
||||
import os
|
||||
import pickle
|
||||
from typing import Sequence
|
||||
|
||||
import google.auth.exceptions
|
||||
from google.auth.transport.requests import Request
|
||||
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||
from google.oauth2.credentials import Credentials
|
||||
|
||||
from mail_intake.auth.base import MailIntakeAuthProvider
|
||||
from mail_intake.credentials.store import CredentialStore
|
||||
from mail_intake.exceptions import MailIntakeAuthError
|
||||
|
||||
|
||||
@@ -14,34 +30,62 @@ class MailIntakeGoogleAuth(MailIntakeAuthProvider):
|
||||
"""
|
||||
Google OAuth provider for Gmail access.
|
||||
|
||||
Responsibilities:
|
||||
- Load cached credentials from disk
|
||||
- Refresh expired tokens when possible
|
||||
- Trigger interactive login only when strictly required
|
||||
This provider implements the `MailIntakeAuthProvider` interface using
|
||||
Google's OAuth 2.0 flow and credential management libraries.
|
||||
|
||||
This class is synchronous and intentionally state-light.
|
||||
Responsibilities:
|
||||
- Load cached credentials from a credential store when available
|
||||
- Refresh expired credentials when possible
|
||||
- Initiate an interactive OAuth flow only when required
|
||||
- Persist refreshed or newly obtained credentials via the store
|
||||
|
||||
This class is synchronous by design and maintains a minimal internal state.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
credentials_path: str,
|
||||
token_path: str,
|
||||
store: CredentialStore[Credentials],
|
||||
scopes: Sequence[str],
|
||||
):
|
||||
"""
|
||||
Initialize the Google authentication provider.
|
||||
|
||||
Args:
|
||||
credentials_path:
|
||||
Path to the Google OAuth client secrets file used to
|
||||
initiate the OAuth 2.0 flow.
|
||||
|
||||
store:
|
||||
Credential store responsible for persisting and
|
||||
retrieving Google OAuth credentials.
|
||||
|
||||
scopes:
|
||||
OAuth scopes required for Gmail access.
|
||||
"""
|
||||
self.credentials_path = credentials_path
|
||||
self.token_path = token_path
|
||||
self.store = store
|
||||
self.scopes = list(scopes)
|
||||
|
||||
def get_credentials(self):
|
||||
creds = None
|
||||
def get_credentials(self) -> Credentials:
|
||||
"""
|
||||
Retrieve valid Google OAuth credentials.
|
||||
|
||||
# Attempt to load cached credentials
|
||||
if os.path.exists(self.token_path):
|
||||
try:
|
||||
with open(self.token_path, "rb") as fh:
|
||||
creds = pickle.load(fh)
|
||||
except Exception:
|
||||
creds = None
|
||||
This method attempts to:
|
||||
1. Load cached credentials from the configured credential store
|
||||
2. Refresh expired credentials when possible
|
||||
3. Perform an interactive OAuth login as a fallback
|
||||
4. Persist valid credentials for future use
|
||||
|
||||
Returns:
|
||||
A ``google.oauth2.credentials.Credentials`` instance suitable
|
||||
for use with Google API clients.
|
||||
|
||||
Raises:
|
||||
MailIntakeAuthError: If credentials cannot be loaded, refreshed,
|
||||
or obtained via interactive authentication.
|
||||
"""
|
||||
creds = self.store.load()
|
||||
|
||||
# Validate / refresh credentials
|
||||
if not creds or not creds.valid:
|
||||
@@ -49,6 +93,7 @@ class MailIntakeGoogleAuth(MailIntakeAuthProvider):
|
||||
try:
|
||||
creds.refresh(Request())
|
||||
except google.auth.exceptions.RefreshError:
|
||||
self.store.clear()
|
||||
creds = None
|
||||
|
||||
# Interactive login if refresh failed or creds missing
|
||||
@@ -69,13 +114,12 @@ class MailIntakeGoogleAuth(MailIntakeAuthProvider):
|
||||
"Failed to complete Google OAuth flow"
|
||||
) from exc
|
||||
|
||||
# Persist refreshed / new credentials
|
||||
# Persist refreshed or newly obtained credentials
|
||||
try:
|
||||
with open(self.token_path, "wb") as fh:
|
||||
pickle.dump(creds, fh)
|
||||
self.store.save(creds)
|
||||
except Exception as exc:
|
||||
raise MailIntakeAuthError(
|
||||
f"Failed to write token file: {self.token_path}"
|
||||
"Failed to persist Google OAuth credentials"
|
||||
) from exc
|
||||
|
||||
return creds
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
"""
|
||||
Global configuration models for Mail Intake.
|
||||
|
||||
This module defines the **top-level configuration object** used to control
|
||||
mail ingestion behavior across adapters, authentication providers, and
|
||||
ingestion workflows.
|
||||
|
||||
Configuration is intentionally explicit, immutable, and free of implicit
|
||||
environment reads to ensure predictability and testability.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
@@ -9,12 +20,26 @@ class MailIntakeConfig:
|
||||
|
||||
This configuration is intentionally explicit and immutable.
|
||||
No implicit environment reads or global state.
|
||||
|
||||
Design principles:
|
||||
- Immutable once constructed
|
||||
- Explicit configuration over implicit defaults
|
||||
- No direct environment or filesystem access
|
||||
|
||||
This model is safe to pass across layers and suitable for serialization.
|
||||
"""
|
||||
|
||||
provider: str = "gmail"
|
||||
user_id: str = "me"
|
||||
readonly: bool = True
|
||||
"""Identifier of the mail provider to use (e.g., ``"gmail"``)."""
|
||||
|
||||
user_id: str = "me"
|
||||
"""Provider-specific user identifier. Defaults to the authenticated user."""
|
||||
|
||||
readonly: bool = True
|
||||
"""Whether ingestion should operate in read-only mode."""
|
||||
|
||||
# Provider-specific paths (optional at this layer)
|
||||
credentials_path: Optional[str] = None
|
||||
"""Optional path to provider credentials configuration."""
|
||||
|
||||
token_path: Optional[str] = None
|
||||
"""Optional path to persisted authentication tokens."""
|
||||
|
||||
29
mail_intake/credentials/__init__.py
Normal file
29
mail_intake/credentials/__init__.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""
|
||||
Credential persistence interfaces and implementations for Mail Intake.
|
||||
|
||||
This package defines the abstractions and concrete implementations used
|
||||
to persist authentication credentials across Mail Intake components.
|
||||
|
||||
The credential persistence layer is intentionally decoupled from
|
||||
authentication logic. Authentication providers are responsible for
|
||||
credential acquisition, validation, and refresh, while implementations
|
||||
within this package are responsible solely for storage and retrieval.
|
||||
|
||||
The package provides:
|
||||
- A generic ``CredentialStore`` abstraction defining the persistence contract
|
||||
- Local filesystem–based storage for development and single-node use
|
||||
- Distributed, Redis-backed storage for production and scaled deployments
|
||||
|
||||
Credential lifecycle management, interpretation, and security policy
|
||||
decisions remain the responsibility of authentication providers.
|
||||
"""
|
||||
|
||||
from mail_intake.credentials.store import CredentialStore
|
||||
from mail_intake.credentials.pickle import PickleCredentialStore
|
||||
from mail_intake.credentials.redis import RedisCredentialStore
|
||||
|
||||
__all__ = [
|
||||
"CredentialStore",
|
||||
"PickleCredentialStore",
|
||||
"RedisCredentialStore",
|
||||
]
|
||||
96
mail_intake/credentials/pickle.py
Normal file
96
mail_intake/credentials/pickle.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
Local filesystem–based credential persistence for Mail Intake.
|
||||
|
||||
This module provides a file-backed implementation of the
|
||||
``CredentialStore`` abstraction using Python's ``pickle`` module.
|
||||
|
||||
The pickle-based credential store is intended for local development,
|
||||
single-node deployments, and controlled environments where credentials
|
||||
do not need to be shared across processes or machines.
|
||||
|
||||
Due to the security and portability risks associated with pickle-based
|
||||
serialization, this implementation is not suitable for distributed or
|
||||
untrusted environments.
|
||||
"""
|
||||
|
||||
import pickle
|
||||
from typing import Optional, TypeVar
|
||||
|
||||
from mail_intake.credentials.store import CredentialStore
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class PickleCredentialStore(CredentialStore[T]):
|
||||
"""
|
||||
Filesystem-backed credential store using pickle serialization.
|
||||
|
||||
This store persists credentials as a pickled object on the local
|
||||
filesystem. It is a simple implementation intended primarily for
|
||||
development, testing, and single-process execution contexts.
|
||||
|
||||
This implementation:
|
||||
- Stores credentials on the local filesystem
|
||||
- Uses pickle for serialization and deserialization
|
||||
- Does not provide encryption, locking, or concurrency guarantees
|
||||
|
||||
Credential lifecycle management, validation, and refresh logic are
|
||||
explicitly out of scope for this class.
|
||||
"""
|
||||
|
||||
def __init__(self, path: str):
|
||||
"""
|
||||
Initialize a pickle-backed credential store.
|
||||
|
||||
Args:
|
||||
path:
|
||||
Filesystem path where credentials will be stored.
|
||||
The file will be created or overwritten as needed.
|
||||
"""
|
||||
self.path = path
|
||||
|
||||
def load(self) -> Optional[T]:
|
||||
"""
|
||||
Load credentials from the local filesystem.
|
||||
|
||||
If the credential file does not exist or cannot be successfully
|
||||
deserialized, this method returns ``None``.
|
||||
|
||||
The store does not attempt to validate or interpret the returned
|
||||
credentials.
|
||||
|
||||
Returns:
|
||||
An instance of type ``T`` if credentials are present and
|
||||
successfully deserialized; otherwise ``None``.
|
||||
"""
|
||||
try:
|
||||
with open(self.path, "rb") as fh:
|
||||
return pickle.load(fh)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def save(self, credentials: T) -> None:
|
||||
"""
|
||||
Persist credentials to the local filesystem.
|
||||
|
||||
Any previously stored credentials at the configured path are
|
||||
overwritten.
|
||||
|
||||
Args:
|
||||
credentials:
|
||||
The credential object to persist.
|
||||
"""
|
||||
with open(self.path, "wb") as fh:
|
||||
pickle.dump(credentials, fh)
|
||||
|
||||
def clear(self) -> None:
|
||||
"""
|
||||
Remove persisted credentials from the local filesystem.
|
||||
|
||||
This method deletes the credential file if it exists and should
|
||||
be treated as an idempotent operation.
|
||||
"""
|
||||
import os
|
||||
|
||||
if os.path.exists(self.path):
|
||||
os.remove(self.path)
|
||||
142
mail_intake/credentials/redis.py
Normal file
142
mail_intake/credentials/redis.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""
|
||||
Redis-backed credential persistence for Mail Intake.
|
||||
|
||||
This module provides a Redis-based implementation of the
|
||||
``CredentialStore`` abstraction, enabling credential persistence
|
||||
across distributed and horizontally scaled deployments.
|
||||
|
||||
The Redis credential store is designed for environments where
|
||||
authentication credentials must be shared safely across multiple
|
||||
processes, containers, or nodes, such as container orchestration
|
||||
platforms and microservice architectures.
|
||||
|
||||
Key characteristics:
|
||||
- Distributed-safe, shared storage using Redis
|
||||
- Explicit, caller-defined serialization and deserialization
|
||||
- No reliance on unsafe mechanisms such as pickle
|
||||
- Optional time-to-live (TTL) support for automatic credential expiry
|
||||
|
||||
This module is responsible solely for persistence concerns.
|
||||
Credential validation, refresh, rotation, and acquisition remain the
|
||||
responsibility of authentication provider implementations.
|
||||
"""
|
||||
|
||||
|
||||
from typing import Optional, TypeVar, Callable
|
||||
|
||||
from mail_intake.credentials.store import CredentialStore
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class RedisCredentialStore(CredentialStore[T]):
|
||||
"""
|
||||
Redis-backed implementation of ``CredentialStore``.
|
||||
|
||||
This store persists credentials in Redis and is suitable for
|
||||
distributed and horizontally scaled deployments where credentials
|
||||
must be shared across multiple processes or nodes.
|
||||
|
||||
The store is intentionally generic and delegates all serialization
|
||||
concerns to caller-provided functions. This avoids unsafe mechanisms
|
||||
such as pickle and allows credential formats to be explicitly
|
||||
controlled and audited.
|
||||
|
||||
This class is responsible only for persistence and retrieval.
|
||||
It does not interpret, validate, refresh, or otherwise manage
|
||||
the lifecycle of the credentials being stored.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
redis_client,
|
||||
key: str,
|
||||
serialize: Callable[[T], bytes],
|
||||
deserialize: Callable[[bytes], T],
|
||||
ttl_seconds: Optional[int] = None,
|
||||
):
|
||||
"""
|
||||
Initialize a Redis-backed credential store.
|
||||
|
||||
Args:
|
||||
redis_client:
|
||||
An initialized Redis client instance (for example,
|
||||
``redis.Redis`` or a compatible interface) used to
|
||||
communicate with the Redis server.
|
||||
|
||||
key:
|
||||
The Redis key under which credentials are stored.
|
||||
Callers are responsible for applying appropriate
|
||||
namespacing to avoid collisions.
|
||||
|
||||
serialize:
|
||||
A callable that converts a credential object of type
|
||||
``T`` into a ``bytes`` representation suitable for
|
||||
storage in Redis.
|
||||
|
||||
deserialize:
|
||||
A callable that converts a ``bytes`` payload retrieved
|
||||
from Redis back into a credential object of type ``T``.
|
||||
|
||||
ttl_seconds:
|
||||
Optional time-to-live (TTL) for the stored credentials,
|
||||
expressed in seconds. When provided, Redis will
|
||||
automatically expire the stored credentials after the
|
||||
specified duration. If ``None``, credentials are stored
|
||||
without an expiration.
|
||||
"""
|
||||
self.redis = redis_client
|
||||
self.key = key
|
||||
self.serialize = serialize
|
||||
self.deserialize = deserialize
|
||||
self.ttl_seconds = ttl_seconds
|
||||
|
||||
def load(self) -> Optional[T]:
|
||||
"""
|
||||
Load credentials from Redis.
|
||||
|
||||
If no value exists for the configured key, or if the stored
|
||||
payload cannot be successfully deserialized, this method
|
||||
returns ``None``.
|
||||
|
||||
The store does not attempt to validate the returned credentials
|
||||
or determine whether they are expired or otherwise usable.
|
||||
|
||||
Returns:
|
||||
An instance of type ``T`` if credentials are present and
|
||||
successfully deserialized; otherwise ``None``.
|
||||
"""
|
||||
raw = self.redis.get(self.key)
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
return self.deserialize(raw)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def save(self, credentials: T) -> None:
|
||||
"""
|
||||
Persist credentials to Redis.
|
||||
|
||||
Any previously stored credentials under the same key are
|
||||
overwritten. If a TTL is configured, the credentials will
|
||||
expire automatically after the specified duration.
|
||||
|
||||
Args:
|
||||
credentials:
|
||||
The credential object to persist.
|
||||
"""
|
||||
payload = self.serialize(credentials)
|
||||
if self.ttl_seconds:
|
||||
self.redis.setex(self.key, self.ttl_seconds, payload)
|
||||
else:
|
||||
self.redis.set(self.key, payload)
|
||||
|
||||
def clear(self) -> None:
|
||||
"""
|
||||
Remove stored credentials from Redis.
|
||||
|
||||
This operation deletes the configured Redis key if it exists.
|
||||
Implementations should treat this method as idempotent.
|
||||
"""
|
||||
self.redis.delete(self.key)
|
||||
96
mail_intake/credentials/store.py
Normal file
96
mail_intake/credentials/store.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
Credential persistence abstractions for Mail Intake.
|
||||
|
||||
This module defines the generic persistence contract used to store and
|
||||
retrieve authentication credentials across Mail Intake components.
|
||||
|
||||
The ``CredentialStore`` abstraction establishes a strict separation
|
||||
between credential *lifecycle management* and credential *storage*.
|
||||
Authentication providers are responsible for acquiring, validating,
|
||||
refreshing, and revoking credentials, while concrete store
|
||||
implementations are responsible solely for persistence concerns.
|
||||
|
||||
By remaining agnostic to credential structure, serialization format,
|
||||
and storage backend, this module enables multiple persistence
|
||||
strategies—such as local files, in-memory caches, distributed stores,
|
||||
or secrets managers—without coupling authentication logic to any
|
||||
specific storage mechanism.
|
||||
"""
|
||||
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Generic, Optional, TypeVar
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class CredentialStore(ABC, Generic[T]):
|
||||
"""
|
||||
Abstract base class defining a generic persistence interface for
|
||||
authentication credentials.
|
||||
|
||||
This interface separates *credential lifecycle management* from
|
||||
*credential storage mechanics*. Implementations are responsible
|
||||
only for persistence concerns, while authentication providers
|
||||
retain full control over credential creation, validation, refresh,
|
||||
and revocation logic.
|
||||
|
||||
The store is intentionally agnostic to:
|
||||
- The concrete credential type being stored
|
||||
- The serialization format used to persist credentials
|
||||
- The underlying storage backend or durability guarantees
|
||||
|
||||
Type Parameters:
|
||||
T:
|
||||
The concrete credential type managed by the store. This may
|
||||
represent OAuth credentials, API tokens, session objects,
|
||||
or any other authentication material.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def load(self) -> Optional[T]:
|
||||
"""
|
||||
Load previously persisted credentials.
|
||||
|
||||
Implementations should return ``None`` when no credentials are
|
||||
present or when stored credentials cannot be successfully
|
||||
decoded or deserialized.
|
||||
|
||||
The store must not attempt to validate, refresh, or otherwise
|
||||
interpret the returned credentials.
|
||||
|
||||
Returns:
|
||||
An instance of type ``T`` if credentials are available and
|
||||
loadable; otherwise ``None``.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def save(self, credentials: T) -> None:
|
||||
"""
|
||||
Persist credentials to the underlying storage backend.
|
||||
|
||||
This method is invoked when credentials are newly obtained or
|
||||
have been refreshed and are known to be valid at the time of
|
||||
persistence.
|
||||
|
||||
Implementations are responsible for:
|
||||
- Ensuring durability appropriate to the deployment context
|
||||
- Applying encryption or access controls where required
|
||||
- Overwriting any previously stored credentials
|
||||
|
||||
Args:
|
||||
credentials:
|
||||
The credential object to persist.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def clear(self) -> None:
|
||||
"""
|
||||
Remove any persisted credentials from the store.
|
||||
|
||||
This method is called when credentials are known to be invalid,
|
||||
revoked, corrupted, or otherwise unusable, and must ensure that
|
||||
no stale authentication material remains accessible.
|
||||
|
||||
Implementations should treat this operation as idempotent.
|
||||
"""
|
||||
@@ -1,19 +1,49 @@
|
||||
"""
|
||||
Exception hierarchy for Mail Intake.
|
||||
|
||||
This module defines the **canonical exception types** used throughout the
|
||||
Mail Intake library.
|
||||
|
||||
All library-raised errors derive from `MailIntakeError`. Consumers are
|
||||
encouraged to catch this base type (or specific subclasses) rather than
|
||||
provider-specific or third-party exceptions.
|
||||
"""
|
||||
|
||||
|
||||
class MailIntakeError(Exception):
|
||||
"""
|
||||
Base exception for all mail-intake errors.
|
||||
Base exception for all Mail Intake errors.
|
||||
|
||||
Users of the library should catch this type (or subclasses)
|
||||
instead of provider-specific or third-party exceptions.
|
||||
This is the root of the Mail Intake exception hierarchy.
|
||||
All errors raised by the library must derive from this class.
|
||||
|
||||
Consumers should generally catch this type when handling
|
||||
library-level failures.
|
||||
"""
|
||||
|
||||
|
||||
class MailIntakeAuthError(MailIntakeError):
|
||||
"""Authentication and credential-related failures."""
|
||||
"""
|
||||
Authentication and credential-related failures.
|
||||
|
||||
Raised when authentication providers are unable to acquire,
|
||||
refresh, or persist valid credentials.
|
||||
"""
|
||||
|
||||
|
||||
class MailIntakeAdapterError(MailIntakeError):
|
||||
"""Errors raised by mail provider adapters."""
|
||||
"""
|
||||
Errors raised by mail provider adapters.
|
||||
|
||||
Raised when a provider adapter encounters API errors,
|
||||
transport failures, or invalid provider responses.
|
||||
"""
|
||||
|
||||
|
||||
class MailIntakeParsingError(MailIntakeError):
|
||||
"""Errors encountered while parsing message content."""
|
||||
"""
|
||||
Errors encountered while parsing message content.
|
||||
|
||||
Raised when raw provider payloads cannot be interpreted
|
||||
or normalized into internal domain models.
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
Mail ingestion orchestration for Mail Intake.
|
||||
|
||||
This package contains **high-level ingestion components** responsible for
|
||||
coordinating mail retrieval, parsing, normalization, and model construction.
|
||||
|
||||
It represents the **top of the ingestion pipeline** and is intended to be the
|
||||
primary interaction surface for library consumers.
|
||||
|
||||
Components in this package:
|
||||
- Are provider-agnostic
|
||||
- Depend only on adapter and parser contracts
|
||||
- Contain no provider-specific API logic
|
||||
- Expose read-only ingestion workflows
|
||||
|
||||
Consumers are expected to construct a mail adapter and pass it to the
|
||||
ingestion layer to begin processing messages and threads.
|
||||
"""
|
||||
|
||||
from .reader import MailIntakeReader
|
||||
|
||||
__all__ = [
|
||||
"MailIntakeReader",
|
||||
]
|
||||
|
||||
@@ -1,3 +1,18 @@
|
||||
"""
|
||||
High-level mail ingestion orchestration for Mail Intake.
|
||||
|
||||
This module provides the primary, provider-agnostic entry point for
|
||||
reading and processing mail data.
|
||||
|
||||
It coordinates:
|
||||
- Mail adapter access
|
||||
- Message and thread iteration
|
||||
- Header and body parsing
|
||||
- Normalization and model construction
|
||||
|
||||
No provider-specific logic or API semantics are permitted in this layer.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Iterator, Dict, Any
|
||||
|
||||
@@ -14,22 +29,43 @@ class MailIntakeReader:
|
||||
"""
|
||||
High-level read-only ingestion interface.
|
||||
|
||||
This is the primary entry point users should interact with.
|
||||
It orchestrates:
|
||||
- adapter calls
|
||||
- parsing
|
||||
- normalization
|
||||
- model construction
|
||||
This class is the **primary entry point** for consumers of the Mail
|
||||
Intake library.
|
||||
|
||||
No provider-specific logic exists here.
|
||||
It orchestrates the full ingestion pipeline:
|
||||
- Querying the adapter for message references
|
||||
- Fetching raw provider messages
|
||||
- Parsing and normalizing message data
|
||||
- Constructing domain models
|
||||
|
||||
This class is intentionally:
|
||||
- Provider-agnostic
|
||||
- Stateless beyond iteration scope
|
||||
- Read-only
|
||||
"""
|
||||
|
||||
def __init__(self, adapter: MailIntakeAdapter):
|
||||
"""
|
||||
Initialize the mail reader.
|
||||
|
||||
Args:
|
||||
adapter: Mail adapter implementation used to retrieve raw
|
||||
messages and threads from a mail provider.
|
||||
"""
|
||||
self._adapter = adapter
|
||||
|
||||
def iter_messages(self, query: str) -> Iterator[MailIntakeMessage]:
|
||||
"""
|
||||
Iterate over parsed messages matching a provider query.
|
||||
|
||||
Args:
|
||||
query: Provider-specific query string used to filter messages.
|
||||
|
||||
Yields:
|
||||
Fully parsed and normalized `MailIntakeMessage` instances.
|
||||
|
||||
Raises:
|
||||
MailIntakeParsingError: If a message cannot be parsed.
|
||||
"""
|
||||
for ref in self._adapter.iter_message_refs(query):
|
||||
raw = self._adapter.fetch_message(ref["message_id"])
|
||||
@@ -39,7 +75,17 @@ class MailIntakeReader:
|
||||
"""
|
||||
Iterate over threads constructed from messages matching a query.
|
||||
|
||||
Messages are grouped by thread_id and yielded as complete threads.
|
||||
Messages are grouped by `thread_id` and yielded as complete thread
|
||||
objects containing all associated messages.
|
||||
|
||||
Args:
|
||||
query: Provider-specific query string used to filter messages.
|
||||
|
||||
Returns:
|
||||
An iterator of `MailIntakeThread` instances.
|
||||
|
||||
Raises:
|
||||
MailIntakeParsingError: If a message cannot be parsed.
|
||||
"""
|
||||
threads: Dict[str, MailIntakeThread] = {}
|
||||
|
||||
@@ -61,7 +107,17 @@ class MailIntakeReader:
|
||||
|
||||
def _parse_message(self, raw_message: Dict[str, Any]) -> MailIntakeMessage:
|
||||
"""
|
||||
Parse a raw provider message into a MailIntakeMessage.
|
||||
Parse a raw provider message into a `MailIntakeMessage`.
|
||||
|
||||
Args:
|
||||
raw_message: Provider-native message payload.
|
||||
|
||||
Returns:
|
||||
A fully populated `MailIntakeMessage` instance.
|
||||
|
||||
Raises:
|
||||
MailIntakeParsingError: If the message payload is missing required
|
||||
fields or cannot be parsed.
|
||||
"""
|
||||
try:
|
||||
message_id = raw_message["id"]
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
"""
|
||||
Domain models for Mail Intake.
|
||||
|
||||
This package defines the **canonical, provider-agnostic data models**
|
||||
used throughout the Mail Intake ingestion pipeline.
|
||||
|
||||
Models in this package:
|
||||
- Represent fully parsed and normalized mail data
|
||||
- Are safe to persist, serialize, and index
|
||||
- Contain no provider-specific payloads or API semantics
|
||||
- Serve as stable inputs for downstream processing and analysis
|
||||
|
||||
These models form the core internal data contract of the library.
|
||||
"""
|
||||
|
||||
from .message import MailIntakeMessage
|
||||
from .thread import MailIntakeThread
|
||||
|
||||
__all__ = [
|
||||
"MailIntakeMessage",
|
||||
"MailIntakeThread",
|
||||
]
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
"""
|
||||
Message domain models for Mail Intake.
|
||||
|
||||
This module defines the **canonical, provider-agnostic representation**
|
||||
of an individual email message as used internally by the Mail Intake
|
||||
ingestion pipeline.
|
||||
|
||||
Models in this module are safe to persist and must not contain any
|
||||
provider-specific fields or semantics.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict
|
||||
@@ -8,19 +19,37 @@ class MailIntakeMessage:
|
||||
"""
|
||||
Canonical internal representation of a single email message.
|
||||
|
||||
This model is provider-agnostic and safe to persist.
|
||||
No Gmail-specific fields should appear here.
|
||||
This model represents a fully parsed and normalized email message.
|
||||
It is intentionally provider-agnostic and suitable for persistence,
|
||||
indexing, and downstream processing.
|
||||
|
||||
No provider-specific identifiers, payloads, or API semantics
|
||||
should appear in this model.
|
||||
"""
|
||||
|
||||
message_id: str
|
||||
"""Provider-specific message identifier."""
|
||||
|
||||
thread_id: str
|
||||
"""Provider-specific thread identifier to which this message belongs."""
|
||||
|
||||
timestamp: datetime
|
||||
"""Message timestamp as a timezone-naive UTC datetime."""
|
||||
|
||||
from_email: str
|
||||
"""Sender email address."""
|
||||
|
||||
from_name: Optional[str]
|
||||
"""Optional human-readable sender name."""
|
||||
|
||||
subject: str
|
||||
"""Raw subject line of the message."""
|
||||
|
||||
body_text: str
|
||||
"""Extracted plain-text body content of the message."""
|
||||
|
||||
snippet: str
|
||||
"""Short provider-supplied preview snippet of the message."""
|
||||
|
||||
raw_headers: Dict[str, str]
|
||||
"""Normalized mapping of message headers (header name → value)."""
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
"""
|
||||
Thread domain models for Mail Intake.
|
||||
|
||||
This module defines the **canonical, provider-agnostic representation**
|
||||
of an email thread as used internally by the Mail Intake ingestion pipeline.
|
||||
|
||||
Threads group related messages and serve as the primary unit of reasoning
|
||||
for higher-level correspondence workflows.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import List, Set
|
||||
@@ -10,21 +20,40 @@ class MailIntakeThread:
|
||||
"""
|
||||
Canonical internal representation of an email thread.
|
||||
|
||||
Threads are the primary unit of reasoning for correspondence
|
||||
workflows (job applications, interviews, follow-ups, etc.).
|
||||
A thread groups multiple related messages under a single subject
|
||||
and participant set. It is designed to support reasoning over
|
||||
conversational context such as job applications, interviews,
|
||||
follow-ups, and ongoing discussions.
|
||||
|
||||
This model is provider-agnostic and safe to persist.
|
||||
"""
|
||||
|
||||
thread_id: str
|
||||
"""Provider-specific thread identifier."""
|
||||
|
||||
normalized_subject: str
|
||||
"""Normalized subject line used to group related messages."""
|
||||
|
||||
participants: Set[str] = field(default_factory=set)
|
||||
"""Set of unique participant email addresses observed in the thread."""
|
||||
|
||||
messages: List[MailIntakeMessage] = field(default_factory=list)
|
||||
"""Ordered list of messages belonging to this thread."""
|
||||
|
||||
last_activity_at: datetime | None = None
|
||||
"""Timestamp of the most recent message in the thread."""
|
||||
|
||||
def add_message(self, message: MailIntakeMessage) -> None:
|
||||
"""
|
||||
Add a message to the thread and update derived fields.
|
||||
|
||||
This method:
|
||||
- Appends the message to the thread
|
||||
- Tracks unique participants
|
||||
- Updates the last activity timestamp
|
||||
|
||||
Args:
|
||||
message: Parsed mail message to add to the thread.
|
||||
"""
|
||||
self.messages.append(message)
|
||||
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
"""
|
||||
Message parsing utilities for Mail Intake.
|
||||
|
||||
This package contains **provider-aware but adapter-agnostic parsing helpers**
|
||||
used to extract and normalize structured information from raw mail payloads.
|
||||
|
||||
Parsers in this package are responsible for:
|
||||
- Interpreting provider-native message structures
|
||||
- Extracting meaningful fields such as headers, body text, and subjects
|
||||
- Normalizing data into consistent internal representations
|
||||
|
||||
This package does not:
|
||||
- Perform network or IO operations
|
||||
- Contain provider API logic
|
||||
- Construct domain models directly
|
||||
|
||||
Parsing functions are designed to be composable and are orchestrated by the
|
||||
ingestion layer.
|
||||
"""
|
||||
|
||||
from .body import extract_body
|
||||
from .headers import parse_headers, extract_sender
|
||||
from .subject import normalize_subject
|
||||
|
||||
__all__ = [
|
||||
"extract_body",
|
||||
"parse_headers",
|
||||
"extract_sender",
|
||||
"normalize_subject",
|
||||
]
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
"""
|
||||
Message body extraction utilities for Mail Intake.
|
||||
|
||||
This module contains helper functions for extracting a best-effort
|
||||
plain-text body from provider-native message payloads.
|
||||
|
||||
The logic is intentionally tolerant of malformed or partial data and
|
||||
prefers human-readable text over fidelity to original formatting.
|
||||
"""
|
||||
|
||||
import base64
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
@@ -9,6 +19,18 @@ from mail_intake.exceptions import MailIntakeParsingError
|
||||
def _decode_base64(data: str) -> str:
|
||||
"""
|
||||
Decode Gmail URL-safe base64 payload into UTF-8 text.
|
||||
|
||||
Gmail message bodies are encoded using URL-safe base64, which may
|
||||
omit padding and use non-standard characters.
|
||||
|
||||
Args:
|
||||
data: URL-safe base64-encoded string.
|
||||
|
||||
Returns:
|
||||
Decoded UTF-8 text with replacement for invalid characters.
|
||||
|
||||
Raises:
|
||||
MailIntakeParsingError: If decoding fails.
|
||||
"""
|
||||
try:
|
||||
padded = data.replace("-", "+").replace("_", "/")
|
||||
@@ -21,6 +43,16 @@ def _decode_base64(data: str) -> str:
|
||||
def _extract_from_part(part: Dict[str, Any]) -> Optional[str]:
|
||||
"""
|
||||
Extract text content from a single MIME part.
|
||||
|
||||
Supports:
|
||||
- text/plain
|
||||
- text/html (converted to plain text)
|
||||
|
||||
Args:
|
||||
part: MIME part dictionary from a provider payload.
|
||||
|
||||
Returns:
|
||||
Extracted plain-text content, or None if unsupported or empty.
|
||||
"""
|
||||
mime_type = part.get("mimeType")
|
||||
body = part.get("body", {})
|
||||
@@ -49,7 +81,14 @@ def extract_body(payload: Dict[str, Any]) -> str:
|
||||
Priority:
|
||||
1. text/plain
|
||||
2. text/html (stripped to text)
|
||||
3. empty string (if nothing usable found)
|
||||
3. Single-part body
|
||||
4. empty string (if nothing usable found)
|
||||
|
||||
Args:
|
||||
payload: Provider-native message payload dictionary.
|
||||
|
||||
Returns:
|
||||
Extracted plain-text message body.
|
||||
"""
|
||||
if not payload:
|
||||
return ""
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
"""
|
||||
Message header parsing utilities for Mail Intake.
|
||||
|
||||
This module provides helper functions for normalizing and extracting
|
||||
useful information from provider-native message headers.
|
||||
|
||||
The functions here are intentionally simple and tolerant of malformed
|
||||
or incomplete header data.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
|
||||
|
||||
@@ -5,18 +15,28 @@ def parse_headers(raw_headers: List[Dict[str, str]]) -> Dict[str, str]:
|
||||
"""
|
||||
Convert a list of Gmail-style headers into a normalized dict.
|
||||
|
||||
Provider payloads (such as Gmail) typically represent headers as a list
|
||||
of name/value mappings. This function normalizes them into a
|
||||
case-insensitive dictionary keyed by lowercase header names.
|
||||
|
||||
Args:
|
||||
raw_headers: List of header dictionaries, each containing
|
||||
``name`` and ``value`` keys.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping lowercase header names to stripped values.
|
||||
|
||||
Example:
|
||||
Input:
|
||||
[
|
||||
{"name": "From", "value": "John Doe <john@example.com>"},
|
||||
{"name": "Subject", "value": "Re: Interview Update"},
|
||||
...
|
||||
]
|
||||
|
||||
Output:
|
||||
{
|
||||
"from": "...",
|
||||
"subject": "...",
|
||||
...
|
||||
"from": "John Doe <john@example.com>",
|
||||
"subject": "Re: Interview Update",
|
||||
}
|
||||
"""
|
||||
headers: Dict[str, str] = {}
|
||||
@@ -37,18 +57,27 @@ def extract_sender(headers: Dict[str, str]) -> Tuple[str, Optional[str]]:
|
||||
"""
|
||||
Extract sender email and optional display name from headers.
|
||||
|
||||
Returns:
|
||||
(email, name)
|
||||
This function parses the ``From`` header and attempts to extract:
|
||||
- Sender email address
|
||||
- Optional human-readable display name
|
||||
|
||||
If name cannot be determined, name will be None.
|
||||
Args:
|
||||
headers: Normalized header dictionary as returned by
|
||||
:func:`parse_headers`.
|
||||
|
||||
Returns:
|
||||
A tuple ``(email, name)`` where:
|
||||
- ``email`` is the sender email address
|
||||
- ``name`` is the display name, or ``None`` if unavailable
|
||||
|
||||
Examples:
|
||||
``"John Doe <john@example.com>"`` → ``("john@example.com", "John Doe")``
|
||||
``"john@example.com"`` → ``("john@example.com", None)``
|
||||
"""
|
||||
from_header = headers.get("from")
|
||||
if not from_header:
|
||||
return "", None
|
||||
|
||||
# Common forms:
|
||||
# Name <email@domain>
|
||||
# email@domain
|
||||
if "<" in from_header and ">" in from_header:
|
||||
name_part, email_part = from_header.split("<", 1)
|
||||
email = email_part.rstrip(">").strip()
|
||||
|
||||
@@ -1,7 +1,18 @@
|
||||
"""
|
||||
Subject line normalization utilities for Mail Intake.
|
||||
|
||||
This module provides helper functions for normalizing email subject lines
|
||||
to enable reliable thread-level comparison and grouping.
|
||||
|
||||
Normalization is intentionally conservative to avoid altering semantic
|
||||
meaning while removing common reply and forward prefixes.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
|
||||
_PREFIX_RE = re.compile(r"^(re|fw|fwd)\s*:\s*", re.IGNORECASE)
|
||||
"""Regular expression matching common reply/forward subject prefixes."""
|
||||
|
||||
|
||||
def normalize_subject(subject: str) -> str:
|
||||
@@ -9,11 +20,19 @@ def normalize_subject(subject: str) -> str:
|
||||
Normalize an email subject for thread-level comparison.
|
||||
|
||||
Operations:
|
||||
- Strip common prefixes (Re:, Fwd:, FW:)
|
||||
- Collapse whitespace
|
||||
- Preserve original casing (no lowercasing)
|
||||
- Strips common prefixes such as ``Re:``, ``Fwd:``, and ``FW:``
|
||||
- Repeats prefix stripping to handle stacked prefixes
|
||||
- Collapses excessive whitespace
|
||||
- Preserves original casing (no lowercasing)
|
||||
|
||||
This function is intentionally conservative.
|
||||
This function is intentionally conservative and avoids aggressive
|
||||
transformations that could alter the semantic meaning of the subject.
|
||||
|
||||
Args:
|
||||
subject: Raw subject line from a message header.
|
||||
|
||||
Returns:
|
||||
Normalized subject string suitable for thread grouping.
|
||||
"""
|
||||
if not subject:
|
||||
return ""
|
||||
|
||||
159
manage_docs.py
Normal file
159
manage_docs.py
Normal file
@@ -0,0 +1,159 @@
|
||||
"""
|
||||
MkDocs documentation management CLI.
|
||||
|
||||
This script provides a proper CLI interface to:
|
||||
- Generate MkDocs Markdown files with mkdocstrings directives
|
||||
- Build the documentation site
|
||||
- Serve the documentation site locally
|
||||
|
||||
All operations are performed by calling MkDocs as a Python library
|
||||
(no shell command invocation).
|
||||
|
||||
Requirements:
|
||||
- mkdocs
|
||||
- mkdocs-material
|
||||
- mkdocstrings[python]
|
||||
|
||||
Usage:
|
||||
python manage_docs.py generate
|
||||
python manage_docs.py build
|
||||
python manage_docs.py serve
|
||||
|
||||
Optional flags:
|
||||
--docs-dir PATH Path to docs directory (default: ./docs)
|
||||
--package-root NAME Root Python package name (default: mail_intake)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from mkdocs.commands import build as mkdocs_build
|
||||
from mkdocs.commands import serve as mkdocs_serve
|
||||
from mkdocs.config import load_config
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent
|
||||
DEFAULT_DOCS_DIR = PROJECT_ROOT / "docs"
|
||||
DEFAULT_PACKAGE_ROOT = "mail_intake"
|
||||
MKDOCS_YML = PROJECT_ROOT / "mkdocs.yml"
|
||||
|
||||
|
||||
def generate_docs_from_nav(
|
||||
project_root: Path,
|
||||
docs_root: Path,
|
||||
package_root: str,
|
||||
) -> None:
|
||||
"""
|
||||
Create and populate MkDocs Markdown files with mkdocstrings directives.
|
||||
|
||||
This function:
|
||||
- Walks the Python package structure
|
||||
- Mirrors it under the docs directory
|
||||
- Creates missing .md files
|
||||
- Creates index.md for packages (__init__.py)
|
||||
- Overwrites content with ::: package.module
|
||||
|
||||
Examples:
|
||||
mail_intake/__init__.py -> docs/mail_intake/index.md
|
||||
mail_intake/config.py -> docs/mail_intake/config.md
|
||||
mail_intake/adapters/__init__.py -> docs/mail_intake/adapters/index.md
|
||||
mail_intake/adapters/base.py -> docs/mail_intake/adapters/base.md
|
||||
"""
|
||||
|
||||
package_dir = project_root / package_root
|
||||
if not package_dir.exists():
|
||||
raise FileNotFoundError(f"Package not found: {package_dir}")
|
||||
|
||||
docs_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for py_file in package_dir.rglob("*.py"):
|
||||
rel = py_file.relative_to(project_root)
|
||||
|
||||
if py_file.name == "__init__.py":
|
||||
# Package → index.md
|
||||
module_path = ".".join(rel.parent.parts)
|
||||
md_path = docs_root / rel.parent / "index.md"
|
||||
title = rel.parent.name.replace("_", " ").title()
|
||||
else:
|
||||
# Regular module → <module>.md
|
||||
module_path = ".".join(rel.with_suffix("").parts)
|
||||
md_path = docs_root / rel.with_suffix(".md")
|
||||
title = md_path.stem.replace("_", " ").title()
|
||||
|
||||
md_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
content = f"""# {title}
|
||||
|
||||
::: {module_path}
|
||||
"""
|
||||
|
||||
md_path.write_text(content, encoding="utf-8")
|
||||
|
||||
|
||||
def load_mkdocs_config():
|
||||
if not MKDOCS_YML.exists():
|
||||
raise FileNotFoundError("mkdocs.yml not found at project root")
|
||||
return load_config(str(MKDOCS_YML))
|
||||
|
||||
|
||||
def cmd_generate(args: argparse.Namespace) -> None:
|
||||
generate_docs_from_nav(
|
||||
project_root=PROJECT_ROOT,
|
||||
docs_root=args.docs_dir,
|
||||
package_root=args.package_root,
|
||||
)
|
||||
|
||||
|
||||
def cmd_build(_: argparse.Namespace) -> None:
|
||||
config = load_mkdocs_config()
|
||||
mkdocs_build.build(config)
|
||||
|
||||
|
||||
def cmd_serve(_: argparse.Namespace) -> None:
|
||||
mkdocs_serve.serve(
|
||||
config_file=str(MKDOCS_YML)
|
||||
)
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="manage_docs.py",
|
||||
description="Manage MkDocs documentation for the project",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--docs-dir",
|
||||
type=Path,
|
||||
default=DEFAULT_DOCS_DIR,
|
||||
help="Path to the docs directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--package-root",
|
||||
default=DEFAULT_PACKAGE_ROOT,
|
||||
help="Root Python package name",
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
subparsers.add_parser(
|
||||
"generate",
|
||||
help="Generate Markdown files with mkdocstrings directives",
|
||||
).set_defaults(func=cmd_generate)
|
||||
|
||||
subparsers.add_parser(
|
||||
"build",
|
||||
help="Build the MkDocs site",
|
||||
).set_defaults(func=cmd_build)
|
||||
|
||||
subparsers.add_parser(
|
||||
"serve",
|
||||
help="Serve the MkDocs site locally",
|
||||
).set_defaults(func=cmd_serve)
|
||||
|
||||
args = parser.parse_args()
|
||||
args.func(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
66
mkdocs.yml
Normal file
66
mkdocs.yml
Normal file
@@ -0,0 +1,66 @@
|
||||
site_name: Aetoskia Mail Intake
|
||||
site_description: Format-agnostic document reading, parsing, and scraping framework
|
||||
|
||||
theme:
|
||||
name: material
|
||||
palette:
|
||||
- scheme: slate
|
||||
primary: deep purple
|
||||
accent: cyan
|
||||
font:
|
||||
text: Inter
|
||||
code: JetBrains Mono
|
||||
features:
|
||||
- navigation.tabs
|
||||
- navigation.expand
|
||||
- navigation.top
|
||||
- navigation.instant
|
||||
- content.code.copy
|
||||
- content.code.annotate
|
||||
|
||||
plugins:
|
||||
- search
|
||||
- mkdocstrings:
|
||||
handlers:
|
||||
python:
|
||||
paths: ["."]
|
||||
options:
|
||||
docstring_style: google
|
||||
show_source: false
|
||||
show_signature_annotations: true
|
||||
separate_signature: true
|
||||
merge_init_into_class: true
|
||||
inherited_members: true
|
||||
annotations_path: brief
|
||||
show_root_heading: true
|
||||
group_by_category: true
|
||||
|
||||
nav:
|
||||
- Home: mail_intake/index.md
|
||||
|
||||
- Adapters:
|
||||
- Base Adapter: mail_intake/adapters/base.md
|
||||
- Gmail Adapter: mail_intake/adapters/gmail.md
|
||||
|
||||
- Auth:
|
||||
- Base Auth: mail_intake/auth/base.md
|
||||
- Google Auth: mail_intake/auth/google.md
|
||||
|
||||
- Credentials Store:
|
||||
- Store: mail_intake/credentials/store.md
|
||||
- Pickle: mail_intake/credentials/pickle.md
|
||||
- Redis: mail_intake/credentials/redis.md
|
||||
|
||||
- Mail Reader: mail_intake/ingestion/reader.md
|
||||
|
||||
- Models:
|
||||
- Message: mail_intake/models/message.md
|
||||
- Thread: mail_intake/models/thread.md
|
||||
|
||||
- Parsers:
|
||||
- Body: mail_intake/parsers/body.md
|
||||
- Headers: mail_intake/parsers/headers.md
|
||||
- Subject: mail_intake/parsers/subject.md
|
||||
|
||||
- Config: mail_intake/config.md
|
||||
- Exceptions: mail_intake/exceptions.md
|
||||
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "mail-intake"
|
||||
version = "0.0.1"
|
||||
version = "0.0.2"
|
||||
description = "Structured mail ingestion and correspondence parsing with provider adapters (Gmail-first)."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
@@ -77,11 +77,7 @@ Versions = "https://git.aetoskia.com/aetos/mail-intake/tags"
|
||||
|
||||
|
||||
[tool.setuptools]
|
||||
package-dir = { "" = "src" }
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src"]
|
||||
include = ["mail_intake*"]
|
||||
packages = { find = { include = ["mail_intake*"] } }
|
||||
|
||||
|
||||
[tool.ruff]
|
||||
|
||||
@@ -1,10 +1,17 @@
|
||||
beautifulsoup4==4.12.0
|
||||
google-api-python-client==2.187.0
|
||||
google-auth-oauthlib==1.2.3
|
||||
types-beautifulsoup4
|
||||
|
||||
# Test Packages
|
||||
pytest==7.4.0
|
||||
pytest-asyncio==0.21.0
|
||||
pytest-cov==4.1.0
|
||||
|
||||
types-beautifulsoup4
|
||||
|
||||
# Optional, useful locally
|
||||
ipython
|
||||
# Doc Packages
|
||||
mkdocs==1.6.1
|
||||
mkdocs-material==9.6.23
|
||||
neoteroi-mkdocs==1.1.3
|
||||
pymdown-extensions==10.16.1
|
||||
mkdocstrings==1.0.0
|
||||
mkdocstrings-python==2.0.1
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from mail_intake.models.message import MailIntakeMessage
|
||||
from mail_intake.models.thread import MailIntakeThread
|
||||
from mail_intake.models import MailIntakeMessage
|
||||
from mail_intake.models import MailIntakeThread
|
||||
|
||||
|
||||
def test_message_is_immutable():
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import base64
|
||||
|
||||
from mail_intake.parsers.subject import normalize_subject
|
||||
from mail_intake.parsers.headers import parse_headers, extract_sender
|
||||
from mail_intake.parsers.body import extract_body
|
||||
from mail_intake.parsers import normalize_subject
|
||||
from mail_intake.parsers import parse_headers, extract_sender
|
||||
from mail_intake.parsers import extract_body
|
||||
|
||||
|
||||
def _b64(text: str) -> str:
|
||||
|
||||
Reference in New Issue
Block a user