import base64 from mail_intake.parsers import normalize_subject from mail_intake.parsers import parse_headers, extract_sender from mail_intake.parsers import extract_body def _b64(text: str) -> str: return base64.b64encode(text.encode("utf-8")).decode("utf-8") # -------------------- # Subject parsing # -------------------- def test_normalize_subject_strips_common_prefixes(): assert normalize_subject("Re: Interview Update") == "Interview Update" assert normalize_subject("Fwd: Re: Offer Letter") == "Offer Letter" assert normalize_subject("FW: Re: FW: Status") == "Status" def test_normalize_subject_preserves_content_and_case(): subject = "Interview Update – Backend Role" assert normalize_subject(subject) == subject def test_normalize_subject_empty_and_none_safe(): assert normalize_subject("") == "" # -------------------- # Header parsing # -------------------- def test_parse_headers_lowercases_keys(): raw_headers = [ {"name": "From", "value": "Alice "}, {"name": "Subject", "value": "Hello"}, ] headers = parse_headers(raw_headers) assert headers["from"] == "Alice " assert headers["subject"] == "Hello" def test_parse_headers_ignores_invalid_entries(): raw_headers = [ {"name": "From", "value": "Bob "}, {"name": None, "value": "X"}, {"name": "X-Test", "value": None}, ] headers = parse_headers(raw_headers) assert "from" in headers assert "x-test" not in headers def test_extract_sender_with_name_and_email(): headers = {"from": "Alice Smith "} email, name = extract_sender(headers) assert email == "alice@example.com" assert name == "Alice Smith" def test_extract_sender_email_only(): headers = {"from": "bob@example.com"} email, name = extract_sender(headers) assert email == "bob@example.com" assert name is None def test_extract_sender_missing_from(): email, name = extract_sender({}) assert email == "" assert name is None # -------------------- # Body parsing # -------------------- def test_extract_body_prefers_text_plain(): payload = { "parts": [ { "mimeType": "text/html", "body": {"data": _b64("

Hello HTML

")}, }, { "mimeType": "text/plain", "body": {"data": _b64("Hello TEXT")}, }, ] } body = extract_body(payload) assert body == "Hello TEXT" def test_extract_body_falls_back_to_html(): payload = { "parts": [ { "mimeType": "text/html", "body": {"data": _b64("

Hello World

")}, } ] } body = extract_body(payload) assert "Hello" in body assert "World" in body def test_extract_body_single_part(): payload = { "body": {"data": _b64("Single part body")} } body = extract_body(payload) assert body == "Single part body" def test_extract_body_empty_payload(): assert extract_body({}) == ""