diff --git a/mail_intake/__init__.py b/mail_intake/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mail_intake/adapters/__init__.py b/mail_intake/adapters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mail_intake/adapters/base.py b/mail_intake/adapters/base.py new file mode 100644 index 0000000..e26e17d --- /dev/null +++ b/mail_intake/adapters/base.py @@ -0,0 +1,48 @@ +from abc import ABC, abstractmethod +from typing import Iterator, Dict, Any + + +class MailIntakeAdapter(ABC): + """ + Base adapter interface for mail providers. + + This interface defines the minimal contract required for + read-only mail ingestion. No provider-specific concepts + should leak beyond implementations of this class. + """ + + @abstractmethod + def iter_message_refs(self, query: str) -> Iterator[Dict[str, str]]: + """ + Iterate over lightweight message references. + + Must yield dictionaries containing at least: + - message_id + - thread_id + + Example yield: + { + "message_id": "...", + "thread_id": "..." + } + """ + raise NotImplementedError + + @abstractmethod + def fetch_message(self, message_id: str) -> Dict[str, Any]: + """ + Fetch a full raw message by message_id. + + Returns the provider-native message payload + (e.g., Gmail message JSON). + """ + raise NotImplementedError + + @abstractmethod + def fetch_thread(self, thread_id: str) -> Dict[str, Any]: + """ + Fetch a full raw thread by thread_id. + + Returns the provider-native thread payload. + """ + raise NotImplementedError diff --git a/mail_intake/adapters/gmail.py b/mail_intake/adapters/gmail.py new file mode 100644 index 0000000..d4c52dc --- /dev/null +++ b/mail_intake/adapters/gmail.py @@ -0,0 +1,105 @@ +from typing import Iterator, Dict, Any + +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + +from mail_intake.adapters.base import MailIntakeAdapter +from mail_intake.exceptions import MailIntakeAdapterError +from mail_intake.auth.base import MailIntakeAuthProvider + + +class MailIntakeGmailAdapter(MailIntakeAdapter): + """ + Gmail read-only adapter. + + This class is the ONLY place where: + - googleapiclient is imported + - Gmail REST semantics are known + - .execute() is called + + It must remain thin and dumb by design. + """ + + def __init__( + self, + auth_provider: MailIntakeAuthProvider, + user_id: str = "me", + ): + self._auth_provider = auth_provider + self._user_id = user_id + self._service = None + + @property + def service(self): + if self._service is None: + try: + creds = self._auth_provider.get_credentials() + self._service = build("gmail", "v1", credentials=creds) + except Exception as exc: + raise MailIntakeAdapterError( + "Failed to initialize Gmail service" + ) from exc + return self._service + + def iter_message_refs(self, query: str) -> Iterator[Dict[str, str]]: + """ + Iterate over message references matching the query. + + Yields: + { + "message_id": "...", + "thread_id": "..." + } + """ + try: + request = ( + self.service.users() + .messages() + .list(userId=self._user_id, q=query) + ) + + while request is not None: + response = request.execute() + + for msg in response.get("messages", []): + yield { + "message_id": msg["id"], + "thread_id": msg["threadId"], + } + + request = ( + self.service.users() + .messages() + .list_next(request, response) + ) + + except HttpError as exc: + raise MailIntakeAdapterError( + "Gmail API error while listing messages" + ) from exc + + def fetch_message(self, message_id: str) -> Dict[str, Any]: + try: + return ( + self.service.users() + .messages() + .get(userId=self._user_id, id=message_id) + .execute() + ) + except HttpError as exc: + raise MailIntakeAdapterError( + f"Gmail API error while fetching message {message_id}" + ) from exc + + def fetch_thread(self, thread_id: str) -> Dict[str, Any]: + try: + return ( + self.service.users() + .threads() + .get(userId=self._user_id, id=thread_id) + .execute() + ) + except HttpError as exc: + raise MailIntakeAdapterError( + f"Gmail API error while fetching thread {thread_id}" + ) from exc diff --git a/mail_intake/auth/__init__.py b/mail_intake/auth/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mail_intake/auth/base.py b/mail_intake/auth/base.py new file mode 100644 index 0000000..526ff36 --- /dev/null +++ b/mail_intake/auth/base.py @@ -0,0 +1,20 @@ +from abc import ABC, abstractmethod + + +class MailIntakeAuthProvider(ABC): + """ + Abstract authentication provider. + + Mail adapters depend on this interface, not on concrete + OAuth or credential implementations. + """ + + @abstractmethod + def get_credentials(self): + """ + Return provider-specific credentials object. + + This method is synchronous by design and must either + return valid credentials or raise MailIntakeAuthError. + """ + raise NotImplementedError diff --git a/mail_intake/auth/google.py b/mail_intake/auth/google.py new file mode 100644 index 0000000..ec9a4f1 --- /dev/null +++ b/mail_intake/auth/google.py @@ -0,0 +1,81 @@ +import os +import pickle +from typing import Sequence + +import google.auth.exceptions +from google.auth.transport.requests import Request +from google_auth_oauthlib.flow import InstalledAppFlow + +from mail_intake.auth.base import MailIntakeAuthProvider +from mail_intake.exceptions import MailIntakeAuthError + + +class MailIntakeGoogleAuth(MailIntakeAuthProvider): + """ + Google OAuth provider for Gmail access. + + Responsibilities: + - Load cached credentials from disk + - Refresh expired tokens when possible + - Trigger interactive login only when strictly required + + This class is synchronous and intentionally state-light. + """ + + def __init__( + self, + credentials_path: str, + token_path: str, + scopes: Sequence[str], + ): + self.credentials_path = credentials_path + self.token_path = token_path + self.scopes = list(scopes) + + def get_credentials(self): + creds = None + + # Attempt to load cached credentials + if os.path.exists(self.token_path): + try: + with open(self.token_path, "rb") as fh: + creds = pickle.load(fh) + except Exception: + creds = None + + # Validate / refresh credentials + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + try: + creds.refresh(Request()) + except google.auth.exceptions.RefreshError: + creds = None + + # Interactive login if refresh failed or creds missing + if not creds: + if not os.path.exists(self.credentials_path): + raise MailIntakeAuthError( + f"Google credentials file not found: {self.credentials_path}" + ) + + try: + flow = InstalledAppFlow.from_client_secrets_file( + self.credentials_path, + self.scopes, + ) + creds = flow.run_local_server(port=0) + except Exception as exc: + raise MailIntakeAuthError( + "Failed to complete Google OAuth flow" + ) from exc + + # Persist refreshed / new credentials + try: + with open(self.token_path, "wb") as fh: + pickle.dump(creds, fh) + except Exception as exc: + raise MailIntakeAuthError( + f"Failed to write token file: {self.token_path}" + ) from exc + + return creds diff --git a/mail_intake/config.py b/mail_intake/config.py new file mode 100644 index 0000000..afd63c5 --- /dev/null +++ b/mail_intake/config.py @@ -0,0 +1,20 @@ +from dataclasses import dataclass +from typing import Optional + + +@dataclass(frozen=True) +class MailIntakeConfig: + """ + Global configuration for mail-intake. + + This configuration is intentionally explicit and immutable. + No implicit environment reads or global state. + """ + + provider: str = "gmail" + user_id: str = "me" + readonly: bool = True + + # Provider-specific paths (optional at this layer) + credentials_path: Optional[str] = None + token_path: Optional[str] = None diff --git a/mail_intake/exceptions.py b/mail_intake/exceptions.py new file mode 100644 index 0000000..43043f4 --- /dev/null +++ b/mail_intake/exceptions.py @@ -0,0 +1,19 @@ +class MailIntakeError(Exception): + """ + Base exception for all mail-intake errors. + + Users of the library should catch this type (or subclasses) + instead of provider-specific or third-party exceptions. + """ + + +class MailIntakeAuthError(MailIntakeError): + """Authentication and credential-related failures.""" + + +class MailIntakeAdapterError(MailIntakeError): + """Errors raised by mail provider adapters.""" + + +class MailIntakeParsingError(MailIntakeError): + """Errors encountered while parsing message content.""" diff --git a/mail_intake/ingestion/__init__.py b/mail_intake/ingestion/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mail_intake/ingestion/reader.py b/mail_intake/ingestion/reader.py new file mode 100644 index 0000000..186d296 --- /dev/null +++ b/mail_intake/ingestion/reader.py @@ -0,0 +1,99 @@ +from datetime import datetime +from typing import Iterator, Dict, Any + +from mail_intake.adapters.base import MailIntakeAdapter +from mail_intake.models.message import MailIntakeMessage +from mail_intake.models.thread import MailIntakeThread +from mail_intake.parsers.headers import parse_headers, extract_sender +from mail_intake.parsers.body import extract_body +from mail_intake.parsers.subject import normalize_subject +from mail_intake.exceptions import MailIntakeParsingError + + +class MailIntakeReader: + """ + High-level read-only ingestion interface. + + This is the primary entry point users should interact with. + It orchestrates: + - adapter calls + - parsing + - normalization + - model construction + + No provider-specific logic exists here. + """ + + def __init__(self, adapter: MailIntakeAdapter): + self._adapter = adapter + + def iter_messages(self, query: str) -> Iterator[MailIntakeMessage]: + """ + Iterate over parsed messages matching a provider query. + """ + for ref in self._adapter.iter_message_refs(query): + raw = self._adapter.fetch_message(ref["message_id"]) + yield self._parse_message(raw) + + def iter_threads(self, query: str) -> Iterator[MailIntakeThread]: + """ + Iterate over threads constructed from messages matching a query. + + Messages are grouped by thread_id and yielded as complete threads. + """ + threads: Dict[str, MailIntakeThread] = {} + + for ref in self._adapter.iter_message_refs(query): + raw = self._adapter.fetch_message(ref["message_id"]) + message = self._parse_message(raw) + + thread = threads.get(message.thread_id) + if thread is None: + thread = MailIntakeThread( + thread_id=message.thread_id, + normalized_subject=normalize_subject(message.subject), + ) + threads[message.thread_id] = thread + + thread.add_message(message) + + return iter(threads.values()) + + def _parse_message(self, raw_message: Dict[str, Any]) -> MailIntakeMessage: + """ + Parse a raw provider message into a MailIntakeMessage. + """ + try: + message_id = raw_message["id"] + thread_id = raw_message["threadId"] + + # Gmail internalDate is milliseconds since epoch + timestamp_ms = int(raw_message.get("internalDate", 0)) + timestamp = datetime.fromtimestamp(timestamp_ms / 1000) + + payload = raw_message.get("payload", {}) + raw_headers_list = payload.get("headers", []) + + headers = parse_headers(raw_headers_list) + from_email, from_name = extract_sender(headers) + + subject = headers.get("subject", "") + body_text = extract_body(payload) + snippet = raw_message.get("snippet", "") + + return MailIntakeMessage( + message_id=message_id, + thread_id=thread_id, + timestamp=timestamp, + from_email=from_email, + from_name=from_name, + subject=subject, + body_text=body_text, + snippet=snippet, + raw_headers=headers, + ) + + except Exception as exc: + raise MailIntakeParsingError( + f"Failed to parse message {raw_message.get('id')}" + ) from exc diff --git a/mail_intake/models/__init__.py b/mail_intake/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mail_intake/models/message.py b/mail_intake/models/message.py new file mode 100644 index 0000000..2696d75 --- /dev/null +++ b/mail_intake/models/message.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional, Dict + + +@dataclass(frozen=True) +class MailIntakeMessage: + """ + Canonical internal representation of a single email message. + + This model is provider-agnostic and safe to persist. + No Gmail-specific fields should appear here. + """ + + message_id: str + thread_id: str + timestamp: datetime + + from_email: str + from_name: Optional[str] + + subject: str + body_text: str + snippet: str + + raw_headers: Dict[str, str] diff --git a/mail_intake/models/thread.py b/mail_intake/models/thread.py new file mode 100644 index 0000000..ddcf757 --- /dev/null +++ b/mail_intake/models/thread.py @@ -0,0 +1,35 @@ +from dataclasses import dataclass, field +from datetime import datetime +from typing import List, Set + +from mail_intake.models.message import MailIntakeMessage + + +@dataclass +class MailIntakeThread: + """ + Canonical internal representation of an email thread. + + Threads are the primary unit of reasoning for correspondence + workflows (job applications, interviews, follow-ups, etc.). + """ + + thread_id: str + normalized_subject: str + + participants: Set[str] = field(default_factory=set) + messages: List[MailIntakeMessage] = field(default_factory=list) + + last_activity_at: datetime | None = None + + def add_message(self, message: MailIntakeMessage) -> None: + """ + Add a message to the thread and update derived fields. + """ + self.messages.append(message) + + if message.from_email: + self.participants.add(message.from_email) + + if self.last_activity_at is None or message.timestamp > self.last_activity_at: + self.last_activity_at = message.timestamp diff --git a/mail_intake/parsers/__init__.py b/mail_intake/parsers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mail_intake/parsers/body.py b/mail_intake/parsers/body.py new file mode 100644 index 0000000..82e14de --- /dev/null +++ b/mail_intake/parsers/body.py @@ -0,0 +1,83 @@ +import base64 +from typing import Dict, Any, Optional + +from bs4 import BeautifulSoup + +from mail_intake.exceptions import MailIntakeParsingError + + +def _decode_base64(data: str) -> str: + """ + Decode Gmail URL-safe base64 payload into UTF-8 text. + """ + try: + padded = data.replace("-", "+").replace("_", "/") + decoded = base64.b64decode(padded) + return decoded.decode("utf-8", errors="replace") + except Exception as exc: + raise MailIntakeParsingError("Failed to decode message body") from exc + + +def _extract_from_part(part: Dict[str, Any]) -> Optional[str]: + """ + Extract text content from a single MIME part. + """ + mime_type = part.get("mimeType") + body = part.get("body", {}) + data = body.get("data") + + if not data: + return None + + text = _decode_base64(data) + + if mime_type == "text/plain": + return text + + if mime_type == "text/html": + # soup = BeautifulSoup(text, "lxml") + soup = BeautifulSoup(text, "html.parser") + return soup.get_text(separator="\n", strip=True) + + return None + + +def extract_body(payload: Dict[str, Any]) -> str: + """ + Extract the best-effort message body from a Gmail payload. + + Priority: + 1. text/plain + 2. text/html (stripped to text) + 3. empty string (if nothing usable found) + """ + if not payload: + return "" + + # Multipart message + if "parts" in payload: + text_plain = None + text_html = None + + for part in payload.get("parts", []): + content = _extract_from_part(part) + if not content: + continue + + if part.get("mimeType") == "text/plain" and text_plain is None: + text_plain = content + elif part.get("mimeType") == "text/html" and text_html is None: + text_html = content + + if text_plain: + return text_plain + if text_html: + return text_html + + # Single-part message + body = payload.get("body", {}) + data = body.get("data") + if data: + return _decode_base64(data) + + return "" diff --git a/mail_intake/parsers/headers.py b/mail_intake/parsers/headers.py new file mode 100644 index 0000000..7c0c929 --- /dev/null +++ b/mail_intake/parsers/headers.py @@ -0,0 +1,58 @@ +from typing import Dict, List, Tuple, Optional + + +def parse_headers(raw_headers: List[Dict[str, str]]) -> Dict[str, str]: + """ + Convert a list of Gmail-style headers into a normalized dict. + + Input: + [ + {"name": "From", "value": "John Doe "}, + {"name": "Subject", "value": "Re: Interview Update"}, + ... + ] + + Output: + { + "from": "...", + "subject": "...", + ... + } + """ + headers: Dict[str, str] = {} + + for header in raw_headers or []: + name = header.get("name") + value = header.get("value") + + if not name or value is None: + continue + + headers[name.lower()] = value.strip() + + return headers + + +def extract_sender(headers: Dict[str, str]) -> Tuple[str, Optional[str]]: + """ + Extract sender email and optional display name from headers. + + Returns: + (email, name) + + If name cannot be determined, name will be None. + """ + from_header = headers.get("from") + if not from_header: + return "", None + + # Common forms: + # Name + # email@domain + if "<" in from_header and ">" in from_header: + name_part, email_part = from_header.split("<", 1) + email = email_part.rstrip(">").strip() + name = name_part.strip().strip('"') or None + return email, name + + return from_header.strip(), None diff --git a/mail_intake/parsers/subject.py b/mail_intake/parsers/subject.py new file mode 100644 index 0000000..df5041d --- /dev/null +++ b/mail_intake/parsers/subject.py @@ -0,0 +1,33 @@ +import re + + +_PREFIX_RE = re.compile(r"^(re|fw|fwd)\s*:\s*", re.IGNORECASE) + + +def normalize_subject(subject: str) -> str: + """ + Normalize an email subject for thread-level comparison. + + Operations: + - Strip common prefixes (Re:, Fwd:, FW:) + - Collapse whitespace + - Preserve original casing (no lowercasing) + + This function is intentionally conservative. + """ + if not subject: + return "" + + normalized = subject.strip() + + # Strip prefixes repeatedly (e.g., Re: Fwd: Re:) + while True: + new_value = _PREFIX_RE.sub("", normalized) + if new_value == normalized: + break + normalized = new_value.strip() + + # Normalize whitespace + normalized = " ".join(normalized.split()) + + return normalized diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f94d90d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,94 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + + +[project] +name = "mail-intake" +version = "0.0.1" +description = "Structured mail ingestion and correspondence parsing with provider adapters (Gmail-first)." +readme = "README.md" +requires-python = ">=3.10" +license = { text = "MIT" } + +authors = [ + { name = "Aetos Skia", email = "dev@aetoskia.com" } +] +maintainers = [ + { name = "Aetos Skia", email = "dev@aetoskia.com" } +] + + +keywords = [ + "email", + "gmail", + "mail", + "ingestion", + "automation", + "job-search", + "correspondence", +] + +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Communications :: Email", + "Topic :: Software Development :: Libraries", +] + +dependencies = [ + # Gmail API stack + "google-api-python-client>=2.120.0", + "google-auth>=2.28.0", + "google-auth-oauthlib>=1.2.0", + + # Parsing + "beautifulsoup4>=4.12.0", + "lxml>=5.1.0", +] + + +[project.optional-dependencies] +dev = [ + "pytest>=8.0.0", + "pytest-cov>=4.1.0", + "ruff>=0.3.0", + "mypy>=1.8.0", + "types-beautifulsoup4", +] + +docs = [ + "mkdocs>=1.5.0", + "mkdocs-material>=9.5.0", +] + + +[project.urls] +Homepage = "https://git.aetoskia.com/aetos/mail-intake" +Documentation = "https://git.aetoskia.com/aetos/mail-intake#readme" +Repository = "https://git.aetoskia.com/aetos/mail-intake.git" +Issues = "https://git.aetoskia.com/aetos/mail-intake/issues" +Versions = "https://git.aetoskia.com/aetos/mail-intake/tags" + + +[tool.setuptools] +package-dir = { "" = "src" } + +[tool.setuptools.packages.find] +where = ["src"] +include = ["mail_intake*"] + + +[tool.ruff] +line-length = 100 +target-version = "py310" + +[tool.mypy] +python_version = "3.10" +strict = true +ignore_missing_imports = true diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ea791d9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +beautifulsoup4==4.12.0 + +pytest==7.4.0 +pytest-asyncio==0.21.0 +pytest-cov==4.1.0 + +types-beautifulsoup4 + +# Optional, useful locally +ipython diff --git a/tests/unit/test_models.py b/tests/unit/test_models.py new file mode 100644 index 0000000..36be62a --- /dev/null +++ b/tests/unit/test_models.py @@ -0,0 +1,91 @@ +from datetime import datetime, timedelta + +from mail_intake.models.message import MailIntakeMessage +from mail_intake.models.thread import MailIntakeThread + + +def test_message_is_immutable(): + msg = MailIntakeMessage( + message_id="m1", + thread_id="t1", + timestamp=datetime.utcnow(), + from_email="alice@example.com", + from_name="Alice", + subject="Hello", + body_text="Body", + snippet="Snippet", + raw_headers={"from": "Alice "}, + ) + + try: + msg.subject = "Changed" + assert False, "Message should be immutable" + except Exception: + assert True + + +def test_thread_add_message_updates_participants_and_timestamp(): + t0 = datetime.utcnow() + t1 = t0 + timedelta(minutes=5) + + msg1 = MailIntakeMessage( + message_id="m1", + thread_id="t1", + timestamp=t0, + from_email="alice@example.com", + from_name="Alice", + subject="Hello", + body_text="Body", + snippet="Snippet", + raw_headers={}, + ) + + msg2 = MailIntakeMessage( + message_id="m2", + thread_id="t1", + timestamp=t1, + from_email="bob@example.com", + from_name="Bob", + subject="Re: Hello", + body_text="Reply", + snippet="Reply", + raw_headers={}, + ) + + thread = MailIntakeThread( + thread_id="t1", + normalized_subject="Hello", + ) + + thread.add_message(msg1) + assert thread.last_activity_at == t0 + assert "alice@example.com" in thread.participants + + thread.add_message(msg2) + assert thread.last_activity_at == t1 + assert "bob@example.com" in thread.participants + assert len(thread.messages) == 2 + + +def test_thread_handles_messages_without_sender(): + msg = MailIntakeMessage( + message_id="m1", + thread_id="t1", + timestamp=datetime.utcnow(), + from_email="", + from_name=None, + subject="System Message", + body_text="Body", + snippet="Snippet", + raw_headers={}, + ) + + thread = MailIntakeThread( + thread_id="t1", + normalized_subject="System Message", + ) + + thread.add_message(msg) + + assert len(thread.participants) == 0 + assert thread.last_activity_at is not None diff --git a/tests/unit/test_parsers.py b/tests/unit/test_parsers.py new file mode 100644 index 0000000..6c75491 --- /dev/null +++ b/tests/unit/test_parsers.py @@ -0,0 +1,128 @@ +import base64 + +from mail_intake.parsers.subject import normalize_subject +from mail_intake.parsers.headers import parse_headers, extract_sender +from mail_intake.parsers.body import extract_body + + +def _b64(text: str) -> str: + return base64.b64encode(text.encode("utf-8")).decode("utf-8") + + +# -------------------- +# Subject parsing +# -------------------- + +def test_normalize_subject_strips_common_prefixes(): + assert normalize_subject("Re: Interview Update") == "Interview Update" + assert normalize_subject("Fwd: Re: Offer Letter") == "Offer Letter" + assert normalize_subject("FW: Re: FW: Status") == "Status" + + +def test_normalize_subject_preserves_content_and_case(): + subject = "Interview Update – Backend Role" + assert normalize_subject(subject) == subject + + +def test_normalize_subject_empty_and_none_safe(): + assert normalize_subject("") == "" + + +# -------------------- +# Header parsing +# -------------------- + +def test_parse_headers_lowercases_keys(): + raw_headers = [ + {"name": "From", "value": "Alice "}, + {"name": "Subject", "value": "Hello"}, + ] + + headers = parse_headers(raw_headers) + + assert headers["from"] == "Alice " + assert headers["subject"] == "Hello" + + +def test_parse_headers_ignores_invalid_entries(): + raw_headers = [ + {"name": "From", "value": "Bob "}, + {"name": None, "value": "X"}, + {"name": "X-Test", "value": None}, + ] + + headers = parse_headers(raw_headers) + assert "from" in headers + assert "x-test" not in headers + + +def test_extract_sender_with_name_and_email(): + headers = {"from": "Alice Smith "} + email, name = extract_sender(headers) + + assert email == "alice@example.com" + assert name == "Alice Smith" + + +def test_extract_sender_email_only(): + headers = {"from": "bob@example.com"} + email, name = extract_sender(headers) + + assert email == "bob@example.com" + assert name is None + + +def test_extract_sender_missing_from(): + email, name = extract_sender({}) + assert email == "" + assert name is None + + +# -------------------- +# Body parsing +# -------------------- + +def test_extract_body_prefers_text_plain(): + payload = { + "parts": [ + { + "mimeType": "text/html", + "body": {"data": _b64("

Hello HTML

")}, + }, + { + "mimeType": "text/plain", + "body": {"data": _b64("Hello TEXT")}, + }, + ] + } + + body = extract_body(payload) + assert body == "Hello TEXT" + + +def test_extract_body_falls_back_to_html(): + payload = { + "parts": [ + { + "mimeType": "text/html", + "body": {"data": _b64("

Hello World

")}, + } + ] + } + + body = extract_body(payload) + assert "Hello" in body + assert "World" in body + + +def test_extract_body_single_part(): + payload = { + "body": {"data": _b64("Single part body")} + } + + body = extract_body(payload) + assert body == "Single part body" + + +def test_extract_body_empty_payload(): + assert extract_body({}) == ""