This commit is contained in:
2026-01-03 05:21:55 +05:30
parent 278f0a3d40
commit 412a9c7bec
22 changed files with 950 additions and 0 deletions

128
tests/unit/test_parsers.py Normal file
View File

@@ -0,0 +1,128 @@
import base64
from mail_intake.parsers.subject import normalize_subject
from mail_intake.parsers.headers import parse_headers, extract_sender
from mail_intake.parsers.body import extract_body
def _b64(text: str) -> str:
return base64.b64encode(text.encode("utf-8")).decode("utf-8")
# --------------------
# Subject parsing
# --------------------
def test_normalize_subject_strips_common_prefixes():
assert normalize_subject("Re: Interview Update") == "Interview Update"
assert normalize_subject("Fwd: Re: Offer Letter") == "Offer Letter"
assert normalize_subject("FW: Re: FW: Status") == "Status"
def test_normalize_subject_preserves_content_and_case():
subject = "Interview Update Backend Role"
assert normalize_subject(subject) == subject
def test_normalize_subject_empty_and_none_safe():
assert normalize_subject("") == ""
# --------------------
# Header parsing
# --------------------
def test_parse_headers_lowercases_keys():
raw_headers = [
{"name": "From", "value": "Alice <alice@example.com>"},
{"name": "Subject", "value": "Hello"},
]
headers = parse_headers(raw_headers)
assert headers["from"] == "Alice <alice@example.com>"
assert headers["subject"] == "Hello"
def test_parse_headers_ignores_invalid_entries():
raw_headers = [
{"name": "From", "value": "Bob <bob@example.com>"},
{"name": None, "value": "X"},
{"name": "X-Test", "value": None},
]
headers = parse_headers(raw_headers)
assert "from" in headers
assert "x-test" not in headers
def test_extract_sender_with_name_and_email():
headers = {"from": "Alice Smith <alice@example.com>"}
email, name = extract_sender(headers)
assert email == "alice@example.com"
assert name == "Alice Smith"
def test_extract_sender_email_only():
headers = {"from": "bob@example.com"}
email, name = extract_sender(headers)
assert email == "bob@example.com"
assert name is None
def test_extract_sender_missing_from():
email, name = extract_sender({})
assert email == ""
assert name is None
# --------------------
# Body parsing
# --------------------
def test_extract_body_prefers_text_plain():
payload = {
"parts": [
{
"mimeType": "text/html",
"body": {"data": _b64("<p>Hello <b>HTML</b></p>")},
},
{
"mimeType": "text/plain",
"body": {"data": _b64("Hello TEXT")},
},
]
}
body = extract_body(payload)
assert body == "Hello TEXT"
def test_extract_body_falls_back_to_html():
payload = {
"parts": [
{
"mimeType": "text/html",
"body": {"data": _b64("<p>Hello <b>World</b></p>")},
}
]
}
body = extract_body(payload)
assert "Hello" in body
assert "World" in body
def test_extract_body_single_part():
payload = {
"body": {"data": _b64("Single part body")}
}
body = extract_body(payload)
assert body == "Single part body"
def test_extract_body_empty_payload():
assert extract_body({}) == ""