updated docs strings and added README.md

2026-03-08 17:59:53 +05:30
parent 0453fdd88a
commit c541577788
46 changed files with 863 additions and 681 deletions
--- a/mail_intake/parsers/init.py
+++ b/mail_intake/parsers/init.py
@@ -1,34 +1,34 @@
 """
+# Summary
+
 Message parsing utilities for Mail Intake.

---
-
-## Summary
-
 This package contains **provider-aware but adapter-agnostic parsing helpers**
 used to extract and normalize structured information from raw mail payloads.

 Parsers in this package are responsible for:
- Interpreting provider-native message structures
- Extracting meaningful fields such as headers, body text, and subjects
- Normalizing data into consistent internal representations
+
+- Interpreting provider-native message structures.
+- Extracting meaningful fields such as headers, body text, and subjects.
+- Normalizing data into consistent internal representations.

 This package does not:
- Perform network or IO operations
- Contain provider API logic
- Construct domain models directly
+
+- Perform network or IO operations.
+- Contain provider API logic.
+- Construct domain models directly.

 Parsing functions are designed to be composable and are orchestrated by the
 ingestion layer.

 ---

-## Public API
+# Public API

-    extract_body
-    parse_headers
-    extract_sender
-    normalize_subject
+- `extract_body`
+- `parse_headers`
+- `extract_sender`
+- `normalize_subject`

 ---
 """
--- a/mail_intake/parsers/body.py
+++ b/mail_intake/parsers/body.py
@@ -1,4 +1,6 @@
 """
+# Summary
+
 Message body extraction utilities for Mail Intake.

 This module contains helper functions for extracting a best-effort
@@ -24,13 +26,16 @@ def _decode_base64(data: str) -> str:
    omit padding and use non-standard characters.

    Args:
-        data: URL-safe base64-encoded string.
+        data (str):
+            URL-safe base64-encoded string.

    Returns:
-        Decoded UTF-8 text with replacement for invalid characters.
+        str:
+            Decoded UTF-8 text with replacement for invalid characters.

    Raises:
-        MailIntakeParsingError: If decoding fails.
+        MailIntakeParsingError:
+            If decoding fails.
    """
    try:
        padded = data.replace("-", "+").replace("_", "/")
@@ -45,14 +50,17 @@ def _extract_from_part(part: Dict[str, Any]) -> Optional[str]:
    Extract text content from a single MIME part.

    Supports:
-    - text/plain
-    - text/html (converted to plain text)
+
+    - `text/plain`
+    - `text/html` (converted to plain text)

    Args:
-        part: MIME part dictionary from a provider payload.
+        part (Dict[str, Any]):
+            MIME part dictionary from a provider payload.

    Returns:
-        Extracted plain-text content, or None if unsupported or empty.
+        Optional[str]:
+            Extracted plain-text content, or `None` if unsupported or empty.
    """
    mime_type = part.get("mimeType")
    body = part.get("body", {})
@@ -79,16 +87,19 @@ def extract_body(payload: Dict[str, Any]) -> str:
    Extract the best-effort message body from a Gmail payload.

    Priority:
-    1. text/plain
-    2. text/html (stripped to text)
+
+    1. `text/plain`
+    2. `text/html` (stripped to text)
    3. Single-part body
-    4. empty string (if nothing usable found)
+    4. Empty string (if nothing usable found)

    Args:
-        payload: Provider-native message payload dictionary.
+        payload (Dict[str, Any]):
+            Provider-native message payload dictionary.

    Returns:
-        Extracted plain-text message body.
+        str:
+            Extracted plain-text message body.
    """
    if not payload:
        return ""
--- a/mail_intake/parsers/headers.py
+++ b/mail_intake/parsers/headers.py
@@ -1,10 +1,8 @@
 """
+# Summary
+
 Message header parsing utilities for Mail Intake.

---
-
-## Summary
-
 This module provides helper functions for normalizing and extracting
 useful information from provider-native message headers.

@@ -21,7 +19,7 @@ def parse_headers(raw_headers: List[Dict[str, str]]) -> Dict[str, str]:

    Args:
        raw_headers (List[Dict[str, str]]):
-            List of header dictionaries, each containing ``name`` and ``value`` keys.
+            List of header dictionaries, each containing `name` and `value` keys.

    Returns:
        Dict[str, str]:
@@ -30,23 +28,27 @@ def parse_headers(raw_headers: List[Dict[str, str]]) -> Dict[str, str]:
    Notes:
        **Guarantees:**

-            - Provider payloads (such as Gmail) typically represent headers as a list of name/value mappings
-            - This function normalizes them into a case-insensitive dictionary keyed by lowercase header names
+            - Provider payloads (such as Gmail) typically represent headers as a
+              list of name/value mappings.
+            - This function normalizes them into a case-insensitive dictionary
+              keyed by lowercase header names.

    Example:
        Typical usage:
-        
-            Input:
-                [
-                    {"name": "From", "value": "John Doe <john@example.com>"},
-                    {"name": "Subject", "value": "Re: Interview Update"},
-                ]
-    
-            Output:
-                {
-                    "from": "John Doe <john@example.com>",
-                    "subject": "Re: Interview Update",
-                }
+
+        ```python
+        Input:
+            [
+                {"name": "From", "value": "John Doe <john@example.com>"},
+                {"name": "Subject", "value": "Re: Interview Update"},
+            ]
+
+        Output:
+            {
+                "from": "John Doe <john@example.com>",
+                "subject": "Re: Interview Update",
+            }
+        ```
    """
    headers: Dict[str, str] = {}

@@ -68,22 +70,24 @@ def extract_sender(headers: Dict[str, str]) -> Tuple[str, Optional[str]]:

    Args:
        headers (Dict[str, str]):
-            Normalized header dictionary as returned by :func:`parse_headers`.
+            Normalized header dictionary as returned by `parse_headers()`.

    Returns:
        Tuple[str, Optional[str]]:
-            A tuple ``(email, name)`` where ``email`` is the sender email address and ``name`` is the display name, or ``None`` if unavailable.
+            A tuple `(email, name)` where `email` is the sender email address
+            and `name` is the display name, or `None` if unavailable.

    Notes:
        **Responsibilities:**

-            - This function parses the ``From`` header and attempts to extract sender email address and optional human-readable display name
+            - This function parses the `From` header and attempts to extract
+              sender email address and optional human-readable display name.

    Example:
        Typical values:

-            ``"John Doe <john@example.com>"`` -> ``("john@example.com", "John Doe")``
-            ``"john@example.com"`` -> ``("john@example.com", None)``
+        - `"John Doe <john@example.com>"` -> `("john@example.com", "John Doe")`
+        - `"john@example.com"` -> `("john@example.com", None)`
    """
    from_header = headers.get("from")
    if not from_header:
--- a/mail_intake/parsers/subject.py
+++ b/mail_intake/parsers/subject.py
@@ -1,10 +1,8 @@
 """
+# Summary
+
 Subject line normalization utilities for Mail Intake.

---
-
-## Summary
-
 This module provides helper functions for normalizing email subject lines
 to enable reliable thread-level comparison and grouping.

@@ -36,14 +34,15 @@ def normalize_subject(subject: str) -> str:
    Notes:
        **Responsibilities:**

-            - Strips common prefixes such as ``Re:``, ``Fwd:``, and ``FW:``
-            - Repeats prefix stripping to handle stacked prefixes
-            - Collapses excessive whitespace
-            - Preserves original casing (no lowercasing)
+            - Strips common prefixes such as `Re:`, `Fwd:`, and `FW:`.
+            - Repeats prefix stripping to handle stacked prefixes.
+            - Collapses excessive whitespace.
+            - Preserves original casing (no lowercasing).

        **Guarantees:**

-            - This function is intentionally conservative and avoids aggressive transformations that could alter the semantic meaning of the subject
+            - This function is intentionally conservative and avoids aggressive
+              transformations that could alter the semantic meaning of the subject.
    """
    if not subject:
        return ""