feat(testing): add end-to-end HTML scraping and parsing tests with typed parsers

- Add smart httpx MockTransport routing based on endpoint paths
- Render HTML fixtures via Jinja templates populated from JSON data
- Introduce explicit, typed HTML parsers for semantic and table-based content
- Add end-to-end tests covering scraper → content → parser → Pydantic models
- Enforce explicit output contracts and avoid default dict-based parsing
This commit is contained in:
2026-01-02 18:31:34 +05:30
parent fa14a79ec9
commit 07293e4651
8 changed files with 156 additions and 31 deletions

View File

@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html>
<head>
<title>{{ title }}</title>
<meta name="description" content="{{ description }}">
</head>
<body>
<div id="content">{{ content }}</div>
<a href="{{ link_url }}">{{ link_text }}</a>
</body>
</html>

7
tests/mocks/simple.json Normal file
View File

@@ -0,0 +1,7 @@
{
"title": "Test Page",
"description": "Simple test page",
"content": "Hello World",
"link_url": "https://example.com",
"link_text": "Link"
}

View File

@@ -0,0 +1,31 @@
<!DOCTYPE html>
<html>
<head>
<title>{{ title }}</title>
<meta name="description" content="{{ description }}">
</head>
<body>
<h1>{{ heading }}</h1>
<table id="{{ table_id }}">
<thead>
<tr>
{% for col in columns %}
<th>{{ col }}</th>
{% endfor %}
</tr>
</thead>
<tbody>
{% for row in rows %}
<tr>
{% for cell in row %}
<td>{{ cell }}</td>
{% endfor %}
</tr>
{% endfor %}
</tbody>
</table>
<a href="{{ link_url }}">{{ link_text }}</a>
</body>
</html>

14
tests/mocks/table.json Normal file
View File

@@ -0,0 +1,14 @@
{
"title": "Table Test Page",
"description": "HTML page with a table for parsing tests",
"heading": "Sample Table",
"table_id": "data-table",
"columns": ["Name", "Age", "City"],
"rows": [
["Alice", "30", "London"],
["Bob", "25", "New York"],
["Charlie", "35", "Berlin"]
],
"link_url": "https://example.org/details",
"link_text": "Details"
}