updated docs strings and added README.md

This commit is contained in:
2026-03-08 17:59:53 +05:30
parent 0453fdd88a
commit c541577788
46 changed files with 863 additions and 681 deletions

View File

@@ -1,4 +1,6 @@
"""
# Summary
Message body extraction utilities for Mail Intake.
This module contains helper functions for extracting a best-effort
@@ -24,13 +26,16 @@ def _decode_base64(data: str) -> str:
omit padding and use non-standard characters.
Args:
data: URL-safe base64-encoded string.
data (str):
URL-safe base64-encoded string.
Returns:
Decoded UTF-8 text with replacement for invalid characters.
str:
Decoded UTF-8 text with replacement for invalid characters.
Raises:
MailIntakeParsingError: If decoding fails.
MailIntakeParsingError:
If decoding fails.
"""
try:
padded = data.replace("-", "+").replace("_", "/")
@@ -45,14 +50,17 @@ def _extract_from_part(part: Dict[str, Any]) -> Optional[str]:
Extract text content from a single MIME part.
Supports:
- text/plain
- text/html (converted to plain text)
- `text/plain`
- `text/html` (converted to plain text)
Args:
part: MIME part dictionary from a provider payload.
part (Dict[str, Any]):
MIME part dictionary from a provider payload.
Returns:
Extracted plain-text content, or None if unsupported or empty.
Optional[str]:
Extracted plain-text content, or `None` if unsupported or empty.
"""
mime_type = part.get("mimeType")
body = part.get("body", {})
@@ -79,16 +87,19 @@ def extract_body(payload: Dict[str, Any]) -> str:
Extract the best-effort message body from a Gmail payload.
Priority:
1. text/plain
2. text/html (stripped to text)
1. `text/plain`
2. `text/html` (stripped to text)
3. Single-part body
4. empty string (if nothing usable found)
4. Empty string (if nothing usable found)
Args:
payload: Provider-native message payload dictionary.
payload (Dict[str, Any]):
Provider-native message payload dictionary.
Returns:
Extracted plain-text message body.
str:
Extracted plain-text message body.
"""
if not payload:
return ""