Build: 0.1.8
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2026-03-08 01:01:39 +05:30
parent 8701bf92ac
commit 9191de9dff
200 changed files with 130793 additions and 18277 deletions

View File

@@ -77,7 +77,9 @@
<header class="md-header" data-md-component="header">
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="/." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -142,12 +144,19 @@
</label>
<nav class="md-search__options" aria-label="Search">
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
</a>
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
@@ -173,94 +182,6 @@
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="/." class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item">
<a href="/core/" class="md-tabs__link">
Core API
</a>
</li>
<li class="md-tabs__item">
<a href="/html/" class="md-tabs__link">
HTML Handling
</a>
</li>
<li class="md-tabs__item">
<a href="/pdf/" class="md-tabs__link">
PDF Handling
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
@@ -274,10 +195,8 @@
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="/." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -320,13 +239,27 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -335,8 +268,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="/core/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -345,8 +279,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
@@ -356,27 +296,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="/core/" class="md-nav__link">
<span class="md-ellipsis">
Core
</span>
</a>
</li>
@@ -462,13 +381,25 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -477,8 +408,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="/html/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -487,8 +419,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
@@ -498,27 +436,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="/html/" class="md-nav__link">
<span class="md-ellipsis">
Html
</span>
</a>
</li>
@@ -581,13 +498,27 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -596,8 +527,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="/pdf/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -606,8 +538,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
@@ -617,27 +555,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="/pdf/" class="md-nav__link">
<span class="md-ellipsis">
Pdf
</span>
</a>
</li>
@@ -749,6 +666,8 @@
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
@@ -786,7 +705,7 @@
<script id="__config" type="application/json">{"base": "/", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "/assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script id="__config" type="application/json">{"base": "/", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "/assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="/assets/javascripts/bundle.f55a23d4.min.js"></script>

View File

@@ -86,7 +86,9 @@
<header class="md-header" data-md-component="header">
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -151,12 +153,19 @@
</label>
<nav class="md-search__options" aria-label="Search">
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
</a>
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
@@ -182,96 +191,6 @@
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../.." class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../" class="md-tabs__link">
Core API
</a>
</li>
<li class="md-tabs__item">
<a href="../../html/" class="md-tabs__link">
HTML Handling
</a>
</li>
<li class="md-tabs__item">
<a href="../../pdf/" class="md-tabs__link">
PDF Handling
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
@@ -285,10 +204,8 @@
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -332,6 +249,18 @@
@@ -339,7 +268,6 @@
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
@@ -349,8 +277,9 @@
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
<div class="md-nav__link md-nav__container">
<a href="../" class="md-nav__link ">
<span class="md-ellipsis">
@@ -359,8 +288,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_2">
@@ -370,27 +305,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../" class="md-nav__link">
<span class="md-ellipsis">
Core
</span>
</a>
</li>
@@ -459,12 +373,88 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.content--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.core.content-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.content.Content" class="md-nav__link">
<span class="md-ellipsis">
Content
</span>
</a>
<nav class="md-nav" aria-label="Content">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.content.Content-attributes" class="md-nav__link">
<span class="md-ellipsis">
Attributes
</span>
</a>
<nav class="md-nav" aria-label="Attributes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.content.Content.content_type" class="md-nav__link">
<span class="md-ellipsis">
content_type
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.core.content.Content.metadata" class="md-nav__link">
<span class="md-ellipsis">
metadata
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.core.content.Content.raw" class="md-nav__link">
<span class="md-ellipsis">
raw
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.core.content.Content.source" class="md-nav__link">
<span class="md-ellipsis">
source
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
@@ -478,6 +468,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.content.ContentType-attributes" class="md-nav__link">
<span class="md-ellipsis">
Attributes
</span>
</a>
<nav class="md-nav" aria-label="Attributes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.content.ContentType.HTML" class="md-nav__link">
<span class="md-ellipsis">
HTML
@@ -521,6 +521,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -593,13 +603,25 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -608,8 +630,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../html/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -618,8 +641,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
@@ -629,27 +658,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../html/" class="md-nav__link">
<span class="md-ellipsis">
Html
</span>
</a>
</li>
@@ -712,13 +720,27 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -727,8 +749,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../pdf/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -737,8 +760,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
@@ -748,27 +777,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../pdf/" class="md-nav__link">
<span class="md-ellipsis">
Pdf
</span>
</a>
</li>
@@ -883,12 +891,88 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.content--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.core.content-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.content.Content" class="md-nav__link">
<span class="md-ellipsis">
Content
</span>
</a>
<nav class="md-nav" aria-label="Content">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.content.Content-attributes" class="md-nav__link">
<span class="md-ellipsis">
Attributes
</span>
</a>
<nav class="md-nav" aria-label="Attributes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.content.Content.content_type" class="md-nav__link">
<span class="md-ellipsis">
content_type
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.core.content.Content.metadata" class="md-nav__link">
<span class="md-ellipsis">
metadata
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.core.content.Content.raw" class="md-nav__link">
<span class="md-ellipsis">
raw
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.core.content.Content.source" class="md-nav__link">
<span class="md-ellipsis">
source
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
@@ -902,6 +986,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.content.ContentType-attributes" class="md-nav__link">
<span class="md-ellipsis">
Attributes
</span>
</a>
<nav class="md-nav" aria-label="Attributes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.content.ContentType.HTML" class="md-nav__link">
<span class="md-ellipsis">
HTML
@@ -945,6 +1039,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -971,7 +1075,7 @@
<h2 id="omniread.core.content" class="doc doc-heading">
<span class="doc doc-object-name doc-module-name">omniread.core.content</span>
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.core.content</span>
</h2>
@@ -979,6 +1083,8 @@
<div class="doc doc-contents first">
<p>Canonical content models for OmniRead.</p>
<hr />
<h4 id="omniread.core.content--summary">Summary</h4>
<p>This module defines the <strong>format-agnostic content representation</strong> used across
all parsers and scrapers in OmniRead.</p>
<p>The models defined here represent <em>what</em> was extracted, not <em>how</em> it was
@@ -994,94 +1100,38 @@ the semantic meaning of these models.</p>
<h3 id="omniread.core.content-classes">Classes</h3>
<div class="doc doc-object doc-class">
<h3 id="omniread.core.content.Content" class="doc doc-heading">
<span class="doc doc-object-name doc-class-name">Content</span>
<h4 id="omniread.core.content.Content" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">Content</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-dataclass"><code>dataclass</code></small>
</span>
</h3>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">Content</span><span class="p">(</span><span class="n">raw</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">content_type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span>
</code></pre></div>
</h4>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">Content</span><span class="p">(</span><span class="n">raw</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">content_type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Normalized representation of extracted content.</p>
<p>A <code>Content</code> instance represents a raw content payload along with minimal
contextual metadata describing its origin and type.</p>
<p>This class is the <strong>primary exchange format</strong> between:
- Scrapers
- Parsers
- Downstream consumers</p>
<p><span class="doc-section-title">Attributes:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code><span title="omniread.core.content.Content.raw">raw</span></code></td>
<td>
<code>bytes</code>
</td>
<td>
<div class="doc-md-description">
<p>Raw content bytes as retrieved from the source.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td><code><span title="omniread.core.content.Content.source">source</span></code></td>
<td>
<code>str</code>
</td>
<td>
<div class="doc-md-description">
<p>Identifier of the content origin (URL, file path, or logical name).</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td><code><span title="omniread.core.content.Content.content_type">content_type</span></code></td>
<td>
<code><span title="typing.Optional">Optional</span>[<a class="autorefs autorefs-internal" title="omniread.core.content.ContentType" href="#omniread.core.content.ContentType">ContentType</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>Optional MIME type of the content, if known.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td><code><span title="omniread.core.content.Content.metadata">metadata</span></code></td>
<td>
<code><span title="typing.Optional">Optional</span>[<span title="typing.Mapping">Mapping</span>[str, <span title="typing.Any">Any</span>]]</code>
</td>
<td>
<div class="doc-md-description">
<p>Optional, implementation-defined metadata associated with
the content (e.g., headers, encoding hints, extraction notes).</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- A `Content` instance represents a raw content payload along with minimal contextual metadata describing its origin and type
- This class is the primary exchange format between Scrapers, Parsers, and Downstream consumers
</code></pre></div></td></tr></table></div>
</details>
@@ -1091,7 +1141,101 @@ the content (e.g., headers, encoding hints, extraction notes).</p>
<h5 id="omniread.core.content.Content-attributes">Attributes</h5>
<div class="doc doc-object doc-attribute">
<h6 id="omniread.core.content.Content.content_type" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">content_type</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-class-attribute"><code>class-attribute</code></small>
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
</span>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">content_type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Optional MIME type of the content, if known.</p>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h6 id="omniread.core.content.Content.metadata" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">metadata</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-class-attribute"><code>class-attribute</code></small>
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
</span>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Optional, implementation-defined metadata associated with the content (e.g., headers, encoding hints, extraction notes).</p>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h6 id="omniread.core.content.Content.raw" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">raw</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
</span>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">raw</span><span class="p">:</span> <span class="nb">bytes</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Raw content bytes as retrieved from the source.</p>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h6 id="omniread.core.content.Content.source" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">source</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
</span>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">source</span><span class="p">:</span> <span class="nb">str</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Identifier of the content origin (URL, file path, or logical name).</p>
</div>
</div>
@@ -1107,11 +1251,11 @@ the content (e.g., headers, encoding hints, extraction notes).</p>
<h3 id="omniread.core.content.ContentType" class="doc doc-heading">
<span class="doc doc-object-name doc-class-name">ContentType</span>
<h4 id="omniread.core.content.ContentType" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">ContentType</span>
</h3>
</h4>
<div class="doc doc-contents ">
@@ -1120,11 +1264,17 @@ the content (e.g., headers, encoding hints, extraction notes).</p>
<p>Supported MIME types for extracted content.</p>
<p>This enum represents the declared or inferred media type of the content
source. It is primarily used for routing content to the appropriate
parser or downstream consumer.</p>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Guarantees:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- This enum represents the declared or inferred media type of the content source
- It is primarily used for routing content to the appropriate parser or downstream consumer
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
@@ -1133,14 +1283,14 @@ parser or downstream consumer.</p>
<h5 id="omniread.core.content.ContentType-attributes">Attributes</h5>
<div class="doc doc-object doc-attribute">
<h4 id="omniread.core.content.ContentType.HTML" class="doc doc-heading">
<span class="doc doc-object-name doc-attribute-name">HTML</span>
<h6 id="omniread.core.content.ContentType.HTML" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">HTML</span>
<span class="doc doc-labels">
@@ -1148,9 +1298,9 @@ parser or downstream consumer.</p>
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="n">HTML</span> <span class="o">=</span> <span class="s1">&#39;text/html&#39;</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">HTML</span> <span class="o">=</span> <span class="s1">&#39;text/html&#39;</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1163,8 +1313,8 @@ parser or downstream consumer.</p>
<h4 id="omniread.core.content.ContentType.JSON" class="doc doc-heading">
<span class="doc doc-object-name doc-attribute-name">JSON</span>
<h6 id="omniread.core.content.ContentType.JSON" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">JSON</span>
<span class="doc doc-labels">
@@ -1172,9 +1322,9 @@ parser or downstream consumer.</p>
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="n">JSON</span> <span class="o">=</span> <span class="s1">&#39;application/json&#39;</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">JSON</span> <span class="o">=</span> <span class="s1">&#39;application/json&#39;</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1187,8 +1337,8 @@ parser or downstream consumer.</p>
<h4 id="omniread.core.content.ContentType.PDF" class="doc doc-heading">
<span class="doc doc-object-name doc-attribute-name">PDF</span>
<h6 id="omniread.core.content.ContentType.PDF" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">PDF</span>
<span class="doc doc-labels">
@@ -1196,9 +1346,9 @@ parser or downstream consumer.</p>
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="n">PDF</span> <span class="o">=</span> <span class="s1">&#39;application/pdf&#39;</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">PDF</span> <span class="o">=</span> <span class="s1">&#39;application/pdf&#39;</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1211,8 +1361,8 @@ parser or downstream consumer.</p>
<h4 id="omniread.core.content.ContentType.XML" class="doc doc-heading">
<span class="doc doc-object-name doc-attribute-name">XML</span>
<h6 id="omniread.core.content.ContentType.XML" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">XML</span>
<span class="doc doc-labels">
@@ -1220,9 +1370,9 @@ parser or downstream consumer.</p>
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="n">XML</span> <span class="o">=</span> <span class="s1">&#39;application/xml&#39;</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">XML</span> <span class="o">=</span> <span class="s1">&#39;application/xml&#39;</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1266,6 +1416,8 @@ parser or downstream consumer.</p>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
@@ -1303,7 +1455,7 @@ parser or downstream consumer.</p>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>

File diff suppressed because it is too large Load Diff

View File

@@ -86,7 +86,9 @@
<header class="md-header" data-md-component="header">
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -151,12 +153,19 @@
</label>
<nav class="md-search__options" aria-label="Search">
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
</a>
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
@@ -182,96 +191,6 @@
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../.." class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../" class="md-tabs__link">
Core API
</a>
</li>
<li class="md-tabs__item">
<a href="../../html/" class="md-tabs__link">
HTML Handling
</a>
</li>
<li class="md-tabs__item">
<a href="../../pdf/" class="md-tabs__link">
PDF Handling
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
@@ -285,10 +204,8 @@
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -332,6 +249,18 @@
@@ -339,7 +268,6 @@
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
@@ -349,8 +277,9 @@
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
<div class="md-nav__link md-nav__container">
<a href="../" class="md-nav__link ">
<span class="md-ellipsis">
@@ -359,8 +288,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_2">
@@ -370,27 +305,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../" class="md-nav__link">
<span class="md-ellipsis">
Core
</span>
</a>
</li>
@@ -482,6 +396,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.parser--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.core.parser-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.parser.BaseParser" class="md-nav__link">
<span class="md-ellipsis">
BaseParser
@@ -492,6 +425,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.parser.BaseParser-attributes" class="md-nav__link">
<span class="md-ellipsis">
Attributes
</span>
</a>
<nav class="md-nav" aria-label="Attributes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.parser.BaseParser.supported_types" class="md-nav__link">
<span class="md-ellipsis">
supported_types
@@ -500,6 +443,21 @@
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#omniread.core.parser.BaseParser-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.parser.BaseParser.parse" class="md-nav__link">
<span class="md-ellipsis">
@@ -526,6 +484,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -575,13 +543,25 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -590,8 +570,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../html/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -600,8 +581,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
@@ -611,27 +598,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../html/" class="md-nav__link">
<span class="md-ellipsis">
Html
</span>
</a>
</li>
@@ -694,13 +660,27 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -709,8 +689,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../pdf/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -719,8 +700,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
@@ -730,27 +717,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../pdf/" class="md-nav__link">
<span class="md-ellipsis">
Pdf
</span>
</a>
</li>
@@ -865,6 +831,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.parser--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.core.parser-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.parser.BaseParser" class="md-nav__link">
<span class="md-ellipsis">
BaseParser
@@ -875,6 +860,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.parser.BaseParser-attributes" class="md-nav__link">
<span class="md-ellipsis">
Attributes
</span>
</a>
<nav class="md-nav" aria-label="Attributes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.parser.BaseParser.supported_types" class="md-nav__link">
<span class="md-ellipsis">
supported_types
@@ -883,6 +878,21 @@
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#omniread.core.parser.BaseParser-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.parser.BaseParser.parse" class="md-nav__link">
<span class="md-ellipsis">
@@ -909,6 +919,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -935,7 +955,7 @@
<h2 id="omniread.core.parser" class="doc doc-heading">
<span class="doc doc-object-name doc-module-name">omniread.core.parser</span>
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.core.parser</span>
</h2>
@@ -943,6 +963,8 @@
<div class="doc doc-contents first">
<p>Abstract parsing contracts for OmniRead.</p>
<hr />
<h4 id="omniread.core.parser--summary">Summary</h4>
<p>This module defines the <strong>format-agnostic parser interface</strong> used to transform
raw content into structured, typed representations.</p>
<p>Parsers are responsible for:
@@ -963,19 +985,19 @@ raw content into structured, typed representations.</p>
<h3 id="omniread.core.parser-classes">Classes</h3>
<div class="doc doc-object doc-class">
<h3 id="omniread.core.parser.BaseParser" class="doc doc-heading">
<span class="doc doc-object-name doc-class-name">BaseParser</span>
<h4 id="omniread.core.parser.BaseParser" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">BaseParser</span>
</h3>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">BaseParser</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="n">Content</span><span class="p">)</span>
</code></pre></div>
</h4>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">BaseParser</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="n">Content</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
@@ -983,16 +1005,23 @@ raw content into structured, typed representations.</p>
<p>Base interface for all parsers.</p>
<p>A parser is a self-contained object that owns the Content
it is responsible for interpreting.</p>
<p>Implementations must:
- Declare supported content types via <code>supported_types</code>
- Raise parsing-specific exceptions from <code>parse()</code>
- Remain deterministic for a given input</p>
<p>Consumers may rely on:
- Early validation of content compatibility
- Type-stable return values from <code>parse()</code></p>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Guarantees:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- A parser is a self-contained object that owns the Content it is responsible for interpreting
- Consumers may rely on early validation of content compatibility and type-stable return values from `parse()`
</code></pre></div></td></tr></table></div>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
<span class="normal">2</span>
<span class="normal">3</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must declare supported content types via `supported_types`
- Implementations must raise parsing-specific exceptions from `parse()`
- Implementations must remain deterministic for a given input
</code></pre></div></td></tr></table></div>
</details>
<p>Initialize the parser with content to be parsed.</p>
@@ -1010,7 +1039,7 @@ it is responsible for interpreting.</p>
<tr class="doc-section-item">
<td><code>content</code></td>
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../content/#omniread.core.content.Content">Content</a></code>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../omniread/core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
@@ -1056,14 +1085,14 @@ it is responsible for interpreting.</p>
<h5 id="omniread.core.parser.BaseParser-attributes">Attributes</h5>
<div class="doc doc-object doc-attribute">
<h4 id="omniread.core.parser.BaseParser.supported_types" class="doc doc-heading">
<span class="doc doc-object-name doc-attribute-name">supported_types</span>
<h6 id="omniread.core.parser.BaseParser.supported_types" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">supported_types</span>
<span class="doc doc-labels">
@@ -1071,53 +1100,50 @@ it is responsible for interpreting.</p>
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="n">supported_types</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">supported_types</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Set of content types supported by this parser.</p>
<p>An empty set indicates that the parser is content-type agnostic.</p>
<p>Set of content types supported by this parser. An empty set indicates that the parser is content-type agnostic.</p>
</div>
</div>
<h5 id="omniread.core.parser.BaseParser-functions">Functions</h5>
<div class="doc doc-object doc-function">
<h4 id="omniread.core.parser.BaseParser.parse" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">parse</span>
<h6 id="omniread.core.parser.BaseParser.parse" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="n">T</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="n">T</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Parse the owned content into structured output.</p>
<p>Implementations must fully consume the provided content and
return a deterministic, structured output.</p>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<td><code>T</code></td> <td>
<code><span title="omniread.core.parser.T">T</span></code>
</td>
<td>
@@ -1152,6 +1178,13 @@ return a deterministic, structured output.</p>
</tbody>
</table>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must fully consume the provided content and return a deterministic, structured output
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
@@ -1159,13 +1192,13 @@ return a deterministic, structured output.</p>
<div class="doc doc-object doc-function">
<h4 id="omniread.core.parser.BaseParser.supports" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">supports</span>
<h6 id="omniread.core.parser.BaseParser.supports" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">supports</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">supports</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="nb">bool</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">supports</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="nb">bool</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1176,13 +1209,13 @@ return a deterministic, structured output.</p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<td><code>bool</code></td> <td>
<code>bool</code>
</td>
<td>
@@ -1231,6 +1264,8 @@ return a deterministic, structured output.</p>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
@@ -1268,7 +1303,7 @@ return a deterministic, structured output.</p>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>

View File

@@ -86,7 +86,9 @@
<header class="md-header" data-md-component="header">
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -151,12 +153,19 @@
</label>
<nav class="md-search__options" aria-label="Search">
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
</a>
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
@@ -182,96 +191,6 @@
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../.." class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../" class="md-tabs__link">
Core API
</a>
</li>
<li class="md-tabs__item">
<a href="../../html/" class="md-tabs__link">
HTML Handling
</a>
</li>
<li class="md-tabs__item">
<a href="../../pdf/" class="md-tabs__link">
PDF Handling
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
@@ -285,10 +204,8 @@
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -332,6 +249,18 @@
@@ -339,7 +268,6 @@
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
@@ -349,8 +277,9 @@
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
<div class="md-nav__link md-nav__container">
<a href="../" class="md-nav__link ">
<span class="md-ellipsis">
@@ -359,8 +288,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_2">
@@ -370,27 +305,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../" class="md-nav__link">
<span class="md-ellipsis">
Core
</span>
</a>
</li>
@@ -505,6 +419,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.scraper--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.core.scraper-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.scraper.BaseScraper" class="md-nav__link">
<span class="md-ellipsis">
BaseScraper
@@ -515,6 +448,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.scraper.BaseScraper-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.scraper.BaseScraper.fetch" class="md-nav__link">
<span class="md-ellipsis">
fetch
@@ -531,6 +474,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -557,13 +510,25 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -572,8 +537,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../html/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -582,8 +548,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
@@ -593,27 +565,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../html/" class="md-nav__link">
<span class="md-ellipsis">
Html
</span>
</a>
</li>
@@ -676,13 +627,27 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -691,8 +656,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../pdf/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -701,8 +667,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
@@ -712,27 +684,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../pdf/" class="md-nav__link">
<span class="md-ellipsis">
Pdf
</span>
</a>
</li>
@@ -847,6 +798,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.scraper--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.core.scraper-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.scraper.BaseScraper" class="md-nav__link">
<span class="md-ellipsis">
BaseScraper
@@ -857,6 +827,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.scraper.BaseScraper-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.core.scraper.BaseScraper.fetch" class="md-nav__link">
<span class="md-ellipsis">
fetch
@@ -873,6 +853,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -899,7 +889,7 @@
<h2 id="omniread.core.scraper" class="doc doc-heading">
<span class="doc doc-object-name doc-module-name">omniread.core.scraper</span>
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.core.scraper</span>
</h2>
@@ -907,6 +897,8 @@
<div class="doc doc-contents first">
<p>Abstract scraping contracts for OmniRead.</p>
<hr />
<h4 id="omniread.core.scraper--summary">Summary</h4>
<p>This module defines the <strong>format-agnostic scraper interface</strong> responsible for
acquiring raw content from external sources.</p>
<p>Scrapers are responsible for:
@@ -928,17 +920,17 @@ acquiring raw content from external sources.</p>
<h3 id="omniread.core.scraper-classes">Classes</h3>
<div class="doc doc-object doc-class">
<h3 id="omniread.core.scraper.BaseScraper" class="doc doc-heading">
<span class="doc doc-object-name doc-class-name">BaseScraper</span>
<h4 id="omniread.core.scraper.BaseScraper" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">BaseScraper</span>
</h3>
</h4>
<div class="doc doc-contents ">
@@ -947,21 +939,24 @@ acquiring raw content from external sources.</p>
<p>Base interface for all scrapers.</p>
<p>A scraper is responsible ONLY for fetching raw content
(bytes) from a source. It must not interpret or parse it.</p>
<p>A scraper is a <strong>stateless acquisition component</strong> that retrieves raw
content from a source and returns it as a <code>Content</code> object.</p>
<p>Scrapers define <em>how content is obtained</em>, not <em>what the content means</em>.</p>
<p>Implementations may vary in:
- Transport mechanism (HTTP, filesystem, cloud storage)
- Authentication strategy
- Retry and backoff behavior</p>
<p>Implementations must not:
- Parse content
- Modify content semantics
- Couple scraping logic to a specific parser</p>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
<span class="normal">2</span>
<span class="normal">3</span>
<span class="normal">4</span></pre></div></td><td class="code"><div><pre><span></span><code>- A scraper is responsible ONLY for fetching raw content (bytes) from a source. It must not interpret or parse it
- A scraper is a stateless acquisition component that retrieves raw content from a source and returns it as a `Content` object
- Scrapers define how content is obtained, not what the content means
- Implementations may vary in transport mechanism, authentication strategy, retry and backoff behavior
</code></pre></div></td></tr></table></div>
<p><strong>Constraints:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must not parse content, modify content semantics, or couple scraping logic to a specific parser
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
@@ -972,28 +967,26 @@ content from a source and returns it as a <code>Content</code> object.</p>
<h5 id="omniread.core.scraper.BaseScraper-functions">Functions</h5>
<div class="doc doc-object doc-function">
<h4 id="omniread.core.scraper.BaseScraper.fetch" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">fetch</span>
<h6 id="omniread.core.scraper.BaseScraper.fetch" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">fetch</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Content</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Content</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Fetch raw content from the given source.</p>
<p>Implementations must retrieve the content referenced by <code>source</code>
and return it as raw bytes wrapped in a <code>Content</code> object.</p>
<p><span class="doc-section-title">Parameters:</span></p>
@@ -1043,14 +1036,14 @@ and return it as raw bytes wrapped in a <code>Content</code> object.</p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../content/#omniread.core.content.Content">Content</a></code>
<td><code>Content</code></td> <td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../omniread/core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
@@ -1058,42 +1051,6 @@ and return it as raw bytes wrapped in a <code>Content</code> object.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<ul>
<li>Raw content bytes</li>
</ul>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<ul>
<li>Source identifier</li>
</ul>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<ul>
<li>Optional metadata</li>
</ul>
</div>
</td>
</tr>
</tbody>
</table>
@@ -1120,6 +1077,13 @@ and return it as raw bytes wrapped in a <code>Content</code> object.</p>
</tbody>
</table>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must retrieve the content referenced by `source` and return it as raw bytes wrapped in a `Content` object
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
@@ -1157,6 +1121,8 @@ and return it as raw bytes wrapped in a <code>Content</code> object.</p>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
@@ -1194,7 +1160,7 @@ and return it as raw bytes wrapped in a <code>Content</code> object.</p>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>

File diff suppressed because it is too large Load Diff

View File

@@ -86,7 +86,9 @@
<header class="md-header" data-md-component="header">
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -151,12 +153,19 @@
</label>
<nav class="md-search__options" aria-label="Search">
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
</a>
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
@@ -182,96 +191,6 @@
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../.." class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item">
<a href="../../core/" class="md-tabs__link">
Core API
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../" class="md-tabs__link">
HTML Handling
</a>
</li>
<li class="md-tabs__item">
<a href="../../pdf/" class="md-tabs__link">
PDF Handling
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
@@ -285,10 +204,8 @@
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -331,13 +248,27 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -346,8 +277,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../core/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -356,8 +288,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
@@ -367,27 +305,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../core/" class="md-nav__link">
<span class="md-ellipsis">
Core
</span>
</a>
</li>
@@ -474,6 +391,16 @@
@@ -481,7 +408,6 @@
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
@@ -491,8 +417,9 @@
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="">
<div class="md-nav__link md-nav__container">
<a href="../" class="md-nav__link ">
<span class="md-ellipsis">
@@ -501,8 +428,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_3">
@@ -512,27 +445,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../" class="md-nav__link">
<span class="md-ellipsis">
Html
</span>
</a>
</li>
@@ -601,6 +513,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.parser--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.html.parser-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.parser.HTMLParser" class="md-nav__link">
<span class="md-ellipsis">
HTMLParser
@@ -611,6 +542,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.parser.HTMLParser-attributes" class="md-nav__link">
<span class="md-ellipsis">
Attributes
</span>
</a>
<nav class="md-nav" aria-label="Attributes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.parser.HTMLParser.supported_types" class="md-nav__link">
<span class="md-ellipsis">
supported_types
@@ -619,6 +560,21 @@
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#omniread.html.parser.HTMLParser-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.parser.HTMLParser.parse" class="md-nav__link">
<span class="md-ellipsis">
@@ -681,6 +637,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -730,13 +696,27 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -745,8 +725,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../pdf/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -755,8 +736,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
@@ -766,27 +753,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../pdf/" class="md-nav__link">
<span class="md-ellipsis">
Pdf
</span>
</a>
</li>
@@ -901,6 +867,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.parser--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.html.parser-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.parser.HTMLParser" class="md-nav__link">
<span class="md-ellipsis">
HTMLParser
@@ -911,6 +896,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.parser.HTMLParser-attributes" class="md-nav__link">
<span class="md-ellipsis">
Attributes
</span>
</a>
<nav class="md-nav" aria-label="Attributes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.parser.HTMLParser.supported_types" class="md-nav__link">
<span class="md-ellipsis">
supported_types
@@ -919,6 +914,21 @@
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#omniread.html.parser.HTMLParser-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.parser.HTMLParser.parse" class="md-nav__link">
<span class="md-ellipsis">
@@ -981,6 +991,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -1007,7 +1027,7 @@
<h2 id="omniread.html.parser" class="doc doc-heading">
<span class="doc doc-object-name doc-module-name">omniread.html.parser</span>
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.html.parser</span>
</h2>
@@ -1015,6 +1035,8 @@
<div class="doc doc-contents first">
<p>HTML parser base implementations for OmniRead.</p>
<hr />
<h4 id="omniread.html.parser--summary">Summary</h4>
<p>This module provides reusable HTML parsing utilities built on top of
the abstract parser contracts defined in <code>omniread.core.parser</code>.</p>
<p>It supplies:
@@ -1033,38 +1055,42 @@ to return a structured representation appropriate for their use case.</p>
<h3 id="omniread.html.parser-classes">Classes</h3>
<div class="doc doc-object doc-class">
<h3 id="omniread.html.parser.HTMLParser" class="doc doc-heading">
<span class="doc doc-object-name doc-class-name">HTMLParser</span>
<h4 id="omniread.html.parser.HTMLParser" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">HTMLParser</span>
</h3>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">HTMLParser</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="n">Content</span><span class="p">,</span> <span class="n">features</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;html.parser&#39;</span><span class="p">)</span>
</code></pre></div>
</h4>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">HTMLParser</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="n">Content</span><span class="p">,</span> <span class="n">features</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;html.parser&#39;</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.parser.BaseParser" href="../../core/parser/#omniread.core.parser.BaseParser">BaseParser</a>[<span title="omniread.html.parser.T">T</span>]</code>, <code><span title="typing.Generic">Generic</span>[<span title="omniread.html.parser.T">T</span>]</code></p>
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.parser.BaseParser" href="../../omniread/core/parser/#omniread.core.parser.BaseParser">BaseParser</a>[<span title="omniread.html.parser.T">T</span>]</code>, <code><span title="typing.Generic">Generic</span>[<span title="omniread.html.parser.T">T</span>]</code></p>
<p>Base HTML parser.</p>
<p>This class extends the core <code>BaseParser</code> with HTML-specific behavior,
including DOM parsing via BeautifulSoup and reusable extraction helpers.</p>
<p>Provides reusable helpers for HTML extraction.
Concrete parsers must explicitly define the return type.</p>
<p>Characteristics:
- Accepts only HTML content
- Owns a parsed BeautifulSoup DOM tree
- Provides pure helper utilities for common HTML structures</p>
<p>Concrete subclasses must:
- Define the output type <code>T</code>
- Implement the <code>parse()</code> method</p>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- This class extends the core `BaseParser` with HTML-specific behavior, including DOM parsing via BeautifulSoup and reusable extraction helpers
- Provides reusable helpers for HTML extraction. Concrete parsers must explicitly define the return type
</code></pre></div></td></tr></table></div>
<p><strong>Guarantees:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Characteristics: Accepts only HTML content, owns a parsed BeautifulSoup DOM tree, provides pure helper utilities for common HTML structures
</code></pre></div></td></tr></table></div>
<p><strong>Constraints:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Concrete subclasses must define the output type `T` and implement the `parse()` method
</code></pre></div></td></tr></table></div>
</details>
<p>Initialize the HTML parser.</p>
@@ -1082,7 +1108,7 @@ Concrete parsers must explicitly define the return type.</p>
<tr class="doc-section-item">
<td><code>content</code></td>
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../omniread/core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
@@ -1100,8 +1126,7 @@ Concrete parsers must explicitly define the return type.</p>
</td>
<td>
<div class="doc-md-description">
<p>BeautifulSoup parser backend to use
(e.g., 'html.parser', 'lxml').</p>
<p>BeautifulSoup parser backend to use (e.g., 'html.parser', 'lxml').</p>
</div>
</td>
<td>
@@ -1143,14 +1168,14 @@ Concrete parsers must explicitly define the return type.</p>
<h5 id="omniread.html.parser.HTMLParser-attributes">Attributes</h5>
<div class="doc doc-object doc-attribute">
<h4 id="omniread.html.parser.HTMLParser.supported_types" class="doc doc-heading">
<span class="doc doc-object-name doc-attribute-name">supported_types</span>
<h6 id="omniread.html.parser.HTMLParser.supported_types" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">supported_types</span>
<span class="doc doc-labels">
@@ -1158,9 +1183,9 @@ Concrete parsers must explicitly define the return type.</p>
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="n">supported_types</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">HTML</span><span class="p">}</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">supported_types</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">HTML</span><span class="p">}</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1169,41 +1194,39 @@ Concrete parsers must explicitly define the return type.</p>
</div>
<h5 id="omniread.html.parser.HTMLParser-functions">Functions</h5>
<div class="doc doc-object doc-function">
<h4 id="omniread.html.parser.HTMLParser.parse" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">parse</span>
<h6 id="omniread.html.parser.HTMLParser.parse" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="n">T</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="n">T</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Fully parse the HTML content into structured output.</p>
<p>Implementations must fully interpret the HTML DOM and return
a deterministic, structured output.</p>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<td><code>T</code></td> <td>
<code><span title="omniread.html.parser.T">T</span></code>
</td>
<td>
@@ -1215,6 +1238,13 @@ a deterministic, structured output.</p>
</tbody>
</table>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must fully interpret the HTML DOM and return a deterministic, structured output
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
@@ -1222,17 +1252,17 @@ a deterministic, structured output.</p>
<div class="doc doc-object doc-function">
<h4 id="omniread.html.parser.HTMLParser.parse_div" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">parse_div</span>
<h6 id="omniread.html.parser.HTMLParser.parse_div" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse_div</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse_div</span><span class="p">(</span><span class="n">div</span><span class="p">:</span> <span class="n">Tag</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">separator</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39; &#39;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse_div</span><span class="p">(</span><span class="n">div</span><span class="p">:</span> <span class="n">Tag</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">separator</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39; &#39;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1286,13 +1316,13 @@ a deterministic, structured output.</p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<td><code>str</code></td> <td>
<code>str</code>
</td>
<td>
@@ -1311,17 +1341,17 @@ a deterministic, structured output.</p>
<div class="doc doc-object doc-function">
<h4 id="omniread.html.parser.HTMLParser.parse_link" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">parse_link</span>
<h6 id="omniread.html.parser.HTMLParser.parse_link" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse_link</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse_link</span><span class="p">(</span><span class="n">a</span><span class="p">:</span> <span class="n">Tag</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse_link</span><span class="p">(</span><span class="n">a</span><span class="p">:</span> <span class="n">Tag</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1372,7 +1402,8 @@ a deterministic, structured output.</p>
</td>
<td>
<div class="doc-md-description">
<p>The value of the <code>href</code> attribute, or None if absent.</p>
<p>Optional[str]:
The value of the <code>href</code> attribute, or None if absent.</p>
</div>
</td>
</tr>
@@ -1386,20 +1417,17 @@ a deterministic, structured output.</p>
<div class="doc doc-object doc-function">
<h4 id="omniread.html.parser.HTMLParser.parse_meta" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">parse_meta</span>
<h6 id="omniread.html.parser.HTMLParser.parse_meta" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse_meta</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse_meta</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse_meta</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Extract high-level metadata from the HTML document.</p>
<p>This includes:
- Document title
- <code>&lt;meta&gt;</code> tag name/property → content mappings</p>
<p><span class="doc-section-title">Returns:</span></p>
@@ -1417,13 +1445,23 @@ a deterministic, structured output.</p>
</td>
<td>
<div class="doc-md-description">
<p>Dictionary containing extracted metadata.</p>
<p>dict[str, Any]:
Dictionary containing extracted metadata.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- Extract high-level metadata from the HTML document
- This includes: Document title, `&lt;meta&gt;` tag name/property → content mappings
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
@@ -1431,17 +1469,17 @@ a deterministic, structured output.</p>
<div class="doc doc-object doc-function">
<h4 id="omniread.html.parser.HTMLParser.parse_table" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">parse_table</span>
<h6 id="omniread.html.parser.HTMLParser.parse_table" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse_table</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse_table</span><span class="p">(</span><span class="n">table</span><span class="p">:</span> <span class="n">Tag</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse_table</span><span class="p">(</span><span class="n">table</span><span class="p">:</span> <span class="n">Tag</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1492,7 +1530,8 @@ a deterministic, structured output.</p>
</td>
<td>
<div class="doc-md-description">
<p>A list of rows, where each row is a list of cell text values.</p>
<p>list[list[str]]:
A list of rows, where each row is a list of cell text values.</p>
</div>
</td>
</tr>
@@ -1506,13 +1545,13 @@ a deterministic, structured output.</p>
<div class="doc doc-object doc-function">
<h4 id="omniread.html.parser.HTMLParser.supports" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">supports</span>
<h6 id="omniread.html.parser.HTMLParser.supports" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">supports</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">supports</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="nb">bool</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">supports</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="nb">bool</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1523,13 +1562,13 @@ a deterministic, structured output.</p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<td><code>bool</code></td> <td>
<code>bool</code>
</td>
<td>
@@ -1578,6 +1617,8 @@ a deterministic, structured output.</p>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
@@ -1615,7 +1656,7 @@ a deterministic, structured output.</p>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>

View File

@@ -86,7 +86,9 @@
<header class="md-header" data-md-component="header">
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -151,12 +153,19 @@
</label>
<nav class="md-search__options" aria-label="Search">
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
</a>
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
@@ -182,96 +191,6 @@
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../.." class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item">
<a href="../../core/" class="md-tabs__link">
Core API
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../" class="md-tabs__link">
HTML Handling
</a>
</li>
<li class="md-tabs__item">
<a href="../../pdf/" class="md-tabs__link">
PDF Handling
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
@@ -285,10 +204,8 @@
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -331,13 +248,27 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -346,8 +277,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../core/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -356,8 +288,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
@@ -367,27 +305,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../core/" class="md-nav__link">
<span class="md-ellipsis">
Core
</span>
</a>
</li>
@@ -474,6 +391,16 @@
@@ -481,7 +408,6 @@
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
@@ -491,8 +417,9 @@
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="">
<div class="md-nav__link md-nav__container">
<a href="../" class="md-nav__link ">
<span class="md-ellipsis">
@@ -501,8 +428,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_3">
@@ -512,27 +445,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../" class="md-nav__link">
<span class="md-ellipsis">
Html
</span>
</a>
</li>
@@ -624,6 +536,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.scraper--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.html.scraper-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.scraper.HTMLScraper" class="md-nav__link">
<span class="md-ellipsis">
HTMLScraper
@@ -634,6 +565,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.scraper.HTMLScraper-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.scraper.HTMLScraper.fetch" class="md-nav__link">
<span class="md-ellipsis">
fetch
@@ -659,6 +600,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -685,13 +636,27 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -700,8 +665,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../pdf/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -710,8 +676,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
@@ -721,27 +693,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../pdf/" class="md-nav__link">
<span class="md-ellipsis">
Pdf
</span>
</a>
</li>
@@ -856,6 +807,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.scraper--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.html.scraper-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.scraper.HTMLScraper" class="md-nav__link">
<span class="md-ellipsis">
HTMLScraper
@@ -866,6 +836,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.scraper.HTMLScraper-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.html.scraper.HTMLScraper.fetch" class="md-nav__link">
<span class="md-ellipsis">
fetch
@@ -891,6 +871,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -917,7 +907,7 @@
<h2 id="omniread.html.scraper" class="doc doc-heading">
<span class="doc doc-object-name doc-module-name">omniread.html.scraper</span>
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.html.scraper</span>
</h2>
@@ -925,6 +915,8 @@
<div class="doc doc-contents first">
<p>HTML scraping implementation for OmniRead.</p>
<hr />
<h4 id="omniread.html.scraper--summary">Summary</h4>
<p>This module provides an HTTP-based scraper for retrieving HTML documents.
It implements the core <code>BaseScraper</code> contract using <code>httpx</code> as the transport
layer.</p>
@@ -946,38 +938,39 @@ layer.</p>
<h3 id="omniread.html.scraper-classes">Classes</h3>
<div class="doc doc-object doc-class">
<h3 id="omniread.html.scraper.HTMLScraper" class="doc doc-heading">
<span class="doc doc-object-name doc-class-name">HTMLScraper</span>
<h4 id="omniread.html.scraper.HTMLScraper" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">HTMLScraper</span>
</h3>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">HTMLScraper</span><span class="p">(</span><span class="o">*</span><span class="p">,</span> <span class="n">client</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">httpx</span><span class="o">.</span><span class="n">Client</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">15.0</span><span class="p">,</span> <span class="n">headers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">follow_redirects</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span>
</code></pre></div>
</h4>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">HTMLScraper</span><span class="p">(</span><span class="o">*</span><span class="p">,</span> <span class="n">client</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">httpx</span><span class="o">.</span><span class="n">Client</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">15.0</span><span class="p">,</span> <span class="n">headers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">follow_redirects</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.scraper.BaseScraper" href="../../core/scraper/#omniread.core.scraper.BaseScraper">BaseScraper</a></code></p>
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.scraper.BaseScraper" href="../../omniread/core/scraper/#omniread.core.scraper.BaseScraper">BaseScraper</a></code></p>
<p>Base HTML scraper using httpx.</p>
<p>This scraper retrieves HTML documents over HTTP(S) and returns them
as raw content wrapped in a <code>Content</code> object.</p>
<p>Fetches raw bytes and metadata only.
The scraper:
- Uses <code>httpx.Client</code> for HTTP requests
- Enforces an HTML content type
- Preserves HTTP response metadata</p>
<p>The scraper does not:
- Parse HTML
- Perform retries or backoff
- Handle non-HTML responses</p>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- This scraper retrieves HTML documents over HTTP(S) and returns them as raw content wrapped in a `Content` object
- Fetches raw bytes and metadata only. The scraper uses `httpx.Client` for HTTP requests, enforces an HTML content type, preserves HTTP response metadata
</code></pre></div></td></tr></table></div>
<p><strong>Constraints:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- The scraper does not: Parse HTML, perform retries or backoff, handle non-HTML responses
</code></pre></div></td></tr></table></div>
</details>
<p>Initialize the HTML scraper.</p>
@@ -995,12 +988,11 @@ The scraper:
<tr class="doc-section-item">
<td><code>client</code></td>
<td>
<code><span title="typing.Optional">Optional</span>[<span title="httpx.Client">Client</span>]</code>
<code><span title="httpx.Client">Client</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>Optional pre-configured <code>httpx.Client</code>. If omitted,
a client is created internally.</p>
<p>Optional pre-configured <code>httpx.Client</code>. If omitted, a client is created internally.</p>
</div>
</td>
<td>
@@ -1063,18 +1055,18 @@ a client is created internally.</p>
<h5 id="omniread.html.scraper.HTMLScraper-functions">Functions</h5>
<div class="doc doc-object doc-function">
<h4 id="omniread.html.scraper.HTMLScraper.fetch" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">fetch</span>
<h6 id="omniread.html.scraper.HTMLScraper.fetch" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">fetch</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Content</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Content</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1128,66 +1120,18 @@ a client is created internally.</p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
<td><code>Content</code></td> <td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../omniread/core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<p>A <code>Content</code> instance containing:</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<ul>
<li>Raw HTML bytes</li>
</ul>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<ul>
<li>Source URL</li>
</ul>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<ul>
<li>HTML content type</li>
</ul>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<ul>
<li>HTTP response metadata</li>
</ul>
<p>A <code>Content</code> instance containing raw HTML bytes, source URL, HTML content type, and HTTP response metadata.</p>
</div>
</td>
</tr>
@@ -1234,13 +1178,13 @@ a client is created internally.</p>
<div class="doc doc-object doc-function">
<h4 id="omniread.html.scraper.HTMLScraper.validate_content_type" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">validate_content_type</span>
<h6 id="omniread.html.scraper.HTMLScraper.validate_content_type" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">validate_content_type</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">validate_content_type</span><span class="p">(</span><span class="n">response</span><span class="p">:</span> <span class="n">httpx</span><span class="o">.</span><span class="n">Response</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">validate_content_type</span><span class="p">(</span><span class="n">response</span><span class="p">:</span> <span class="n">httpx</span><span class="o">.</span><span class="n">Response</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1291,8 +1235,7 @@ a client is created internally.</p>
</td>
<td>
<div class="doc-md-description">
<p>If the <code>Content-Type</code> header is missing or does not
indicate HTML content.</p>
<p>If the <code>Content-Type</code> header is missing or does not indicate HTML content.</p>
</div>
</td>
</tr>
@@ -1336,6 +1279,8 @@ indicate HTML content.</p>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
@@ -1373,7 +1318,7 @@ indicate HTML content.</p>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -86,7 +86,9 @@
<header class="md-header" data-md-component="header">
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -151,12 +153,19 @@
</label>
<nav class="md-search__options" aria-label="Search">
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
</a>
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
@@ -182,96 +191,6 @@
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../.." class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item">
<a href="../../core/" class="md-tabs__link">
Core API
</a>
</li>
<li class="md-tabs__item">
<a href="../../html/" class="md-tabs__link">
HTML Handling
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../" class="md-tabs__link">
PDF Handling
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
@@ -285,10 +204,8 @@
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -331,13 +248,27 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -346,8 +277,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../core/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -356,8 +288,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
@@ -367,27 +305,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../core/" class="md-nav__link">
<span class="md-ellipsis">
Core
</span>
</a>
</li>
@@ -473,13 +390,25 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -488,8 +417,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../html/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -498,8 +428,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
@@ -509,27 +445,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../html/" class="md-nav__link">
<span class="md-ellipsis">
Html
</span>
</a>
</li>
@@ -593,6 +508,18 @@
@@ -600,7 +527,6 @@
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
@@ -610,8 +536,9 @@
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked>
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="">
<div class="md-nav__link md-nav__container">
<a href="../" class="md-nav__link ">
<span class="md-ellipsis">
@@ -620,8 +547,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_4">
@@ -631,27 +564,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../" class="md-nav__link">
<span class="md-ellipsis">
Pdf
</span>
</a>
</li>
@@ -720,6 +632,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.client--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.pdf.client-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.client.BasePDFClient" class="md-nav__link">
<span class="md-ellipsis">
BasePDFClient
@@ -730,6 +661,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.client.BasePDFClient-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.client.BasePDFClient.fetch" class="md-nav__link">
<span class="md-ellipsis">
fetch
@@ -741,6 +682,11 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
@@ -754,6 +700,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.client.FileSystemPDFClient-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.client.FileSystemPDFClient.fetch" class="md-nav__link">
<span class="md-ellipsis">
fetch
@@ -770,6 +726,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -871,6 +837,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.client--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.pdf.client-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.client.BasePDFClient" class="md-nav__link">
<span class="md-ellipsis">
BasePDFClient
@@ -881,6 +866,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.client.BasePDFClient-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.client.BasePDFClient.fetch" class="md-nav__link">
<span class="md-ellipsis">
fetch
@@ -892,6 +887,11 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
@@ -905,6 +905,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.client.FileSystemPDFClient-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.client.FileSystemPDFClient.fetch" class="md-nav__link">
<span class="md-ellipsis">
fetch
@@ -921,6 +931,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -947,7 +967,7 @@
<h2 id="omniread.pdf.client" class="doc doc-heading">
<span class="doc doc-object-name doc-module-name">omniread.pdf.client</span>
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.pdf.client</span>
</h2>
@@ -955,6 +975,8 @@
<div class="doc doc-contents first">
<p>PDF client abstractions for OmniRead.</p>
<hr />
<h4 id="omniread.pdf.client--summary">Summary</h4>
<p>This module defines the <strong>client layer</strong> responsible for retrieving raw PDF
bytes from a concrete backing store.</p>
<p>Clients provide low-level access to PDF binaries and are intentionally
@@ -974,17 +996,17 @@ interpretation, or content extraction.</p>
<h3 id="omniread.pdf.client-classes">Classes</h3>
<div class="doc doc-object doc-class">
<h3 id="omniread.pdf.client.BasePDFClient" class="doc doc-heading">
<span class="doc doc-object-name doc-class-name">BasePDFClient</span>
<h4 id="omniread.pdf.client.BasePDFClient" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">BasePDFClient</span>
</h3>
</h4>
<div class="doc doc-contents ">
@@ -994,12 +1016,15 @@ interpretation, or content extraction.</p>
<p>Abstract client responsible for retrieving PDF bytes
from a specific backing store (filesystem, S3, FTP, etc.).</p>
<p>Implementations must:
- Accept a source identifier appropriate to the backing store
- Return the full PDF binary payload
- Raise retrieval-specific errors on failure</p>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must accept a source identifier appropriate to the backing store, return the full PDF binary payload, and raise retrieval-specific errors on failure
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
@@ -1010,22 +1035,22 @@ from a specific backing store (filesystem, S3, FTP, etc.).</p>
<h5 id="omniread.pdf.client.BasePDFClient-functions">Functions</h5>
<div class="doc doc-object doc-function">
<h4 id="omniread.pdf.client.BasePDFClient.fetch" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">fetch</span>
<h6 id="omniread.pdf.client.BasePDFClient.fetch" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">fetch</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bytes</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bytes</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1050,8 +1075,7 @@ from a specific backing store (filesystem, S3, FTP, etc.).</p>
</td>
<td>
<div class="doc-md-description">
<p>Identifier of the PDF location, such as a file path,
object storage key, or remote reference.</p>
<p>Identifier of the PDF location, such as a file path, object storage key, or remote reference.</p>
</div>
</td>
<td>
@@ -1066,13 +1090,13 @@ object storage key, or remote reference.</p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<td><code>bytes</code></td> <td>
<code>bytes</code>
</td>
<td>
@@ -1123,11 +1147,11 @@ object storage key, or remote reference.</p>
<h3 id="omniread.pdf.client.FileSystemPDFClient" class="doc doc-heading">
<span class="doc doc-object-name doc-class-name">FileSystemPDFClient</span>
<h4 id="omniread.pdf.client.FileSystemPDFClient" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">FileSystemPDFClient</span>
</h3>
</h4>
<div class="doc doc-contents ">
@@ -1136,10 +1160,15 @@ object storage key, or remote reference.</p>
<p>PDF client that reads from the local filesystem.</p>
<p>This client reads PDF files directly from the disk and returns their raw
binary contents.</p>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Guarantees:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- This client reads PDF files directly from the disk and returns their raw binary contents
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
@@ -1150,18 +1179,18 @@ binary contents.</p>
<h5 id="omniread.pdf.client.FileSystemPDFClient-functions">Functions</h5>
<div class="doc doc-object doc-function">
<h4 id="omniread.pdf.client.FileSystemPDFClient.fetch" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">fetch</span>
<h6 id="omniread.pdf.client.FileSystemPDFClient.fetch" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">fetch</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">fetch</span><span class="p">(</span><span class="n">path</span><span class="p">:</span> <span class="n">Path</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bytes</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">fetch</span><span class="p">(</span><span class="n">path</span><span class="p">:</span> <span class="n">Path</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bytes</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1201,13 +1230,13 @@ binary contents.</p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<td><code>bytes</code></td> <td>
<code>bytes</code>
</td>
<td>
@@ -1289,6 +1318,8 @@ binary contents.</p>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
@@ -1326,7 +1357,7 @@ binary contents.</p>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>

File diff suppressed because it is too large Load Diff

View File

@@ -86,7 +86,9 @@
<header class="md-header" data-md-component="header">
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -151,12 +153,19 @@
</label>
<nav class="md-search__options" aria-label="Search">
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
</a>
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
@@ -182,96 +191,6 @@
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../.." class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item">
<a href="../../core/" class="md-tabs__link">
Core API
</a>
</li>
<li class="md-tabs__item">
<a href="../../html/" class="md-tabs__link">
HTML Handling
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../" class="md-tabs__link">
PDF Handling
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
@@ -285,10 +204,8 @@
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -331,13 +248,27 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -346,8 +277,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../core/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -356,8 +288,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
@@ -367,27 +305,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../core/" class="md-nav__link">
<span class="md-ellipsis">
Core
</span>
</a>
</li>
@@ -473,13 +390,25 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -488,8 +417,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../html/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -498,8 +428,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
@@ -509,27 +445,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../html/" class="md-nav__link">
<span class="md-ellipsis">
Html
</span>
</a>
</li>
@@ -593,6 +508,18 @@
@@ -600,7 +527,6 @@
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
@@ -610,8 +536,9 @@
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked>
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="">
<div class="md-nav__link md-nav__container">
<a href="../" class="md-nav__link ">
<span class="md-ellipsis">
@@ -620,8 +547,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_4">
@@ -631,27 +564,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../" class="md-nav__link">
<span class="md-ellipsis">
Pdf
</span>
</a>
</li>
@@ -743,6 +655,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.parser--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.pdf.parser-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.parser.PDFParser" class="md-nav__link">
<span class="md-ellipsis">
PDFParser
@@ -753,6 +684,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.parser.PDFParser-attributes" class="md-nav__link">
<span class="md-ellipsis">
Attributes
</span>
</a>
<nav class="md-nav" aria-label="Attributes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.parser.PDFParser.supported_types" class="md-nav__link">
<span class="md-ellipsis">
supported_types
@@ -761,6 +702,21 @@
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#omniread.pdf.parser.PDFParser-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.parser.PDFParser.parse" class="md-nav__link">
<span class="md-ellipsis">
@@ -787,6 +743,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -865,6 +831,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.parser--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.pdf.parser-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.parser.PDFParser" class="md-nav__link">
<span class="md-ellipsis">
PDFParser
@@ -875,6 +860,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.parser.PDFParser-attributes" class="md-nav__link">
<span class="md-ellipsis">
Attributes
</span>
</a>
<nav class="md-nav" aria-label="Attributes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.parser.PDFParser.supported_types" class="md-nav__link">
<span class="md-ellipsis">
supported_types
@@ -883,6 +878,21 @@
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#omniread.pdf.parser.PDFParser-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.parser.PDFParser.parse" class="md-nav__link">
<span class="md-ellipsis">
@@ -909,6 +919,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -935,7 +955,7 @@
<h2 id="omniread.pdf.parser" class="doc doc-heading">
<span class="doc doc-object-name doc-module-name">omniread.pdf.parser</span>
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.pdf.parser</span>
</h2>
@@ -943,6 +963,8 @@
<div class="doc doc-contents first">
<p>PDF parser base implementations for OmniRead.</p>
<hr />
<h4 id="omniread.pdf.parser--summary">Summary</h4>
<p>This module defines the <strong>PDF-specific parser contract</strong>, extending the
format-agnostic <code>BaseParser</code> with constraints appropriate for PDF content.</p>
<p>PDF parsers are responsible for interpreting binary PDF data and producing
@@ -957,32 +979,37 @@ structured representations suitable for downstream consumption.</p>
<h3 id="omniread.pdf.parser-classes">Classes</h3>
<div class="doc doc-object doc-class">
<h3 id="omniread.pdf.parser.PDFParser" class="doc doc-heading">
<span class="doc doc-object-name doc-class-name">PDFParser</span>
<h4 id="omniread.pdf.parser.PDFParser" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">PDFParser</span>
</h3>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">PDFParser</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="n">Content</span><span class="p">)</span>
</code></pre></div>
</h4>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">PDFParser</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="n">Content</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.parser.BaseParser" href="../../core/parser/#omniread.core.parser.BaseParser">BaseParser</a>[<span title="omniread.pdf.parser.T">T</span>]</code>, <code><span title="typing.Generic">Generic</span>[<span title="omniread.pdf.parser.T">T</span>]</code></p>
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.parser.BaseParser" href="../../omniread/core/parser/#omniread.core.parser.BaseParser">BaseParser</a>[<span title="omniread.pdf.parser.T">T</span>]</code>, <code><span title="typing.Generic">Generic</span>[<span title="omniread.pdf.parser.T">T</span>]</code></p>
<p>Base PDF parser.</p>
<p>This class enforces PDF content-type compatibility and provides the
extension point for implementing concrete PDF parsing strategies.</p>
<p>Concrete implementations must define:
- Define the output type <code>T</code>
- Implement the <code>parse()</code> method</p>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- This class enforces PDF content-type compatibility and provides the extension point for implementing concrete PDF parsing strategies
</code></pre></div></td></tr></table></div>
<p><strong>Constraints:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Concrete implementations must: Define the output type `T`, implement the `parse()` method
</code></pre></div></td></tr></table></div>
</details>
<p>Initialize the parser with content to be parsed.</p>
@@ -1000,7 +1027,7 @@ extension point for implementing concrete PDF parsing strategies.</p>
<tr class="doc-section-item">
<td><code>content</code></td>
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../omniread/core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
@@ -1046,14 +1073,14 @@ extension point for implementing concrete PDF parsing strategies.</p>
<h5 id="omniread.pdf.parser.PDFParser-attributes">Attributes</h5>
<div class="doc doc-object doc-attribute">
<h4 id="omniread.pdf.parser.PDFParser.supported_types" class="doc doc-heading">
<span class="doc doc-object-name doc-attribute-name">supported_types</span>
<h6 id="omniread.pdf.parser.PDFParser.supported_types" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">supported_types</span>
<span class="doc doc-labels">
@@ -1061,9 +1088,9 @@ extension point for implementing concrete PDF parsing strategies.</p>
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="n">supported_types</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">PDF</span><span class="p">}</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">supported_types</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">PDF</span><span class="p">}</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1072,41 +1099,39 @@ extension point for implementing concrete PDF parsing strategies.</p>
</div>
<h5 id="omniread.pdf.parser.PDFParser-functions">Functions</h5>
<div class="doc doc-object doc-function">
<h4 id="omniread.pdf.parser.PDFParser.parse" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">parse</span>
<h6 id="omniread.pdf.parser.PDFParser.parse" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="n">T</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="n">T</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p>Parse PDF content into a structured output.</p>
<p>Implementations must fully interpret the PDF binary payload and
return a deterministic, structured output.</p>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<td><code>T</code></td> <td>
<code><span title="omniread.pdf.parser.T">T</span></code>
</td>
<td>
@@ -1141,6 +1166,13 @@ return a deterministic, structured output.</p>
</tbody>
</table>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must fully interpret the PDF binary payload and return a deterministic, structured output
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
@@ -1148,13 +1180,13 @@ return a deterministic, structured output.</p>
<div class="doc doc-object doc-function">
<h4 id="omniread.pdf.parser.PDFParser.supports" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">supports</span>
<h6 id="omniread.pdf.parser.PDFParser.supports" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">supports</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">supports</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="nb">bool</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">supports</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="nb">bool</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1165,13 +1197,13 @@ return a deterministic, structured output.</p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<td><code>bool</code></td> <td>
<code>bool</code>
</td>
<td>
@@ -1220,6 +1252,8 @@ return a deterministic, structured output.</p>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
@@ -1257,7 +1291,7 @@ return a deterministic, structured output.</p>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>

View File

@@ -84,7 +84,9 @@
<header class="md-header" data-md-component="header">
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -149,12 +151,19 @@
</label>
<nav class="md-search__options" aria-label="Search">
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
</a>
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
@@ -180,96 +189,6 @@
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../.." class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item">
<a href="../../core/" class="md-tabs__link">
Core API
</a>
</li>
<li class="md-tabs__item">
<a href="../../html/" class="md-tabs__link">
HTML Handling
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../" class="md-tabs__link">
PDF Handling
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
@@ -283,10 +202,8 @@
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
@@ -329,13 +246,27 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -344,8 +275,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../core/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -354,8 +286,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
@@ -365,27 +303,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../core/" class="md-nav__link">
<span class="md-ellipsis">
Core
</span>
</a>
</li>
@@ -471,13 +388,25 @@
<li class="md-nav__item md-nav__item--nested">
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
@@ -486,8 +415,9 @@
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<div class="md-nav__link md-nav__container">
<a href="../../html/" class="md-nav__link ">
<span class="md-ellipsis">
@@ -496,8 +426,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
@@ -507,27 +443,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../html/" class="md-nav__link">
<span class="md-ellipsis">
Html
</span>
</a>
</li>
@@ -591,6 +506,18 @@
@@ -598,7 +525,6 @@
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
@@ -608,8 +534,9 @@
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked>
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="">
<div class="md-nav__link md-nav__container">
<a href="../" class="md-nav__link ">
<span class="md-ellipsis">
@@ -618,8 +545,14 @@
</span>
<span class="md-nav__icon md-icon"></span>
</label>
</a>
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_4">
@@ -629,27 +562,6 @@
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../" class="md-nav__link">
<span class="md-ellipsis">
Pdf
</span>
</a>
</li>
@@ -764,6 +676,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.scraper--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.pdf.scraper-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.scraper.PDFScraper" class="md-nav__link">
<span class="md-ellipsis">
PDFScraper
@@ -774,6 +705,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.scraper.PDFScraper-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.scraper.PDFScraper.fetch" class="md-nav__link">
<span class="md-ellipsis">
fetch
@@ -790,6 +731,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -845,6 +796,25 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.scraper--summary" class="md-nav__link">
<span class="md-ellipsis">
Summary
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#omniread.pdf.scraper-classes" class="md-nav__link">
<span class="md-ellipsis">
Classes
</span>
</a>
<nav class="md-nav" aria-label="Classes">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.scraper.PDFScraper" class="md-nav__link">
<span class="md-ellipsis">
PDFScraper
@@ -855,6 +825,16 @@
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.scraper.PDFScraper-functions" class="md-nav__link">
<span class="md-ellipsis">
Functions
</span>
</a>
<nav class="md-nav" aria-label="Functions">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#omniread.pdf.scraper.PDFScraper.fetch" class="md-nav__link">
<span class="md-ellipsis">
fetch
@@ -871,6 +851,16 @@
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
@@ -897,7 +887,7 @@
<h2 id="omniread.pdf.scraper" class="doc doc-heading">
<span class="doc doc-object-name doc-module-name">omniread.pdf.scraper</span>
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.pdf.scraper</span>
</h2>
@@ -905,6 +895,8 @@
<div class="doc doc-contents first">
<p>PDF scraping implementation for OmniRead.</p>
<hr />
<h4 id="omniread.pdf.scraper--summary">Summary</h4>
<p>This module provides a PDF-specific scraper that coordinates PDF byte
retrieval via a client and normalizes the result into a <code>Content</code> object.</p>
<p>The scraper implements the core <code>BaseScraper</code> contract while delegating
@@ -919,33 +911,39 @@ all storage and access concerns to a <code>BasePDFClient</code> implementation.<
<h3 id="omniread.pdf.scraper-classes">Classes</h3>
<div class="doc doc-object doc-class">
<h3 id="omniread.pdf.scraper.PDFScraper" class="doc doc-heading">
<span class="doc doc-object-name doc-class-name">PDFScraper</span>
<h4 id="omniread.pdf.scraper.PDFScraper" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">PDFScraper</span>
</h3>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">PDFScraper</span><span class="p">(</span><span class="o">*</span><span class="p">,</span> <span class="n">client</span><span class="p">:</span> <span class="n">BasePDFClient</span><span class="p">)</span>
</code></pre></div>
</h4>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">PDFScraper</span><span class="p">(</span><span class="o">*</span><span class="p">,</span> <span class="n">client</span><span class="p">:</span> <span class="n">BasePDFClient</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.scraper.BaseScraper" href="../../core/scraper/#omniread.core.scraper.BaseScraper">BaseScraper</a></code></p>
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.scraper.BaseScraper" href="../../omniread/core/scraper/#omniread.core.scraper.BaseScraper">BaseScraper</a></code></p>
<p>Scraper for PDF sources.</p>
<p>Delegates byte retrieval to a PDF client and normalizes
output into Content.</p>
<p>The scraper:
- Does not perform parsing or interpretation
- Does not assume a specific storage backend
- Preserves caller-provided metadata</p>
<details class="notes" open>
<summary>Notes</summary>
<p><strong>Responsibilities:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- Delegates byte retrieval to a PDF client and normalizes output into Content
- Preserves caller-provided metadata
</code></pre></div></td></tr></table></div>
<p><strong>Constraints:</strong></p>
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- The scraper: Does not perform parsing or interpretation, does not assume a specific storage backend
</code></pre></div></td></tr></table></div>
</details>
<p>Initialize the PDF scraper.</p>
@@ -988,18 +986,18 @@ output into Content.</p>
<h5 id="omniread.pdf.scraper.PDFScraper-functions">Functions</h5>
<div class="doc doc-object doc-function">
<h4 id="omniread.pdf.scraper.PDFScraper.fetch" class="doc doc-heading">
<span class="doc doc-object-name doc-function-name">fetch</span>
<h6 id="omniread.pdf.scraper.PDFScraper.fetch" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">fetch</span>
</h4>
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Content</span>
</code></pre></div>
</h6>
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Content</span>
</span></code></pre></div></td></tr></table></div>
<div class="doc doc-contents ">
@@ -1024,8 +1022,7 @@ output into Content.</p>
</td>
<td>
<div class="doc-md-description">
<p>Identifier of the PDF source as understood by the
configured PDF client.</p>
<p>Identifier of the PDF source as understood by the configured PDF client.</p>
</div>
</td>
<td>
@@ -1054,66 +1051,18 @@ configured PDF client.</p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
<td><code>Content</code></td> <td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../omniread/core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<p>A <code>Content</code> instance containing:</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<ul>
<li>Raw PDF bytes</li>
</ul>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<ul>
<li>Source identifier</li>
</ul>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<ul>
<li>PDF content type</li>
</ul>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
</td>
<td>
<div class="doc-md-description">
<ul>
<li>Optional metadata</li>
</ul>
<p>A <code>Content</code> instance containing raw PDF bytes, source identifier, PDF content type, and optional metadata.</p>
</div>
</td>
</tr>
@@ -1180,6 +1129,8 @@ configured PDF client.</p>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
@@ -1217,7 +1168,7 @@ configured PDF client.</p>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>

File diff suppressed because one or more lines are too long

Binary file not shown.