This commit is contained in:
@@ -77,7 +77,9 @@
|
||||
|
||||
|
||||
|
||||
<header class="md-header" data-md-component="header">
|
||||
|
||||
|
||||
<header class="md-header md-header--shadow" data-md-component="header">
|
||||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||||
<a href="/." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -142,12 +144,19 @@
|
||||
</label>
|
||||
<nav class="md-search__options" aria-label="Search">
|
||||
|
||||
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
|
||||
</a>
|
||||
|
||||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
||||
|
||||
</form>
|
||||
<div class="md-search__output">
|
||||
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||
@@ -173,94 +182,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
||||
<div class="md-grid">
|
||||
<ul class="md-tabs__list">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="/." class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Home
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="/core/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
Core API
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="/html/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
HTML Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="/pdf/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
PDF Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
|
||||
|
||||
<main class="md-main" data-md-component="main">
|
||||
@@ -274,10 +195,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
||||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||
<label class="md-nav__title" for="__drawer">
|
||||
<a href="/." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -320,13 +239,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -335,8 +268,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="/core/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -345,8 +279,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_2">
|
||||
@@ -356,27 +296,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="/core/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Core
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -462,13 +381,25 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -477,8 +408,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="/html/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -487,8 +419,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_3">
|
||||
@@ -498,27 +436,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="/html/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Html
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -581,13 +498,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -596,8 +527,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="/pdf/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -606,8 +538,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_4">
|
||||
@@ -617,27 +555,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="/pdf/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Pdf
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -749,6 +666,8 @@
|
||||
</div>
|
||||
|
||||
|
||||
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
|
||||
|
||||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||
</div>
|
||||
|
||||
@@ -786,7 +705,7 @@
|
||||
|
||||
|
||||
|
||||
<script id="__config" type="application/json">{"base": "/", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "/assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
<script id="__config" type="application/json">{"base": "/", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "/assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
|
||||
|
||||
<script src="/assets/javascripts/bundle.f55a23d4.min.js"></script>
|
||||
|
||||
@@ -86,7 +86,9 @@
|
||||
|
||||
|
||||
|
||||
<header class="md-header" data-md-component="header">
|
||||
|
||||
|
||||
<header class="md-header md-header--shadow" data-md-component="header">
|
||||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||||
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -151,12 +153,19 @@
|
||||
</label>
|
||||
<nav class="md-search__options" aria-label="Search">
|
||||
|
||||
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
|
||||
</a>
|
||||
|
||||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
||||
|
||||
</form>
|
||||
<div class="md-search__output">
|
||||
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||
@@ -182,96 +191,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
||||
<div class="md-grid">
|
||||
<ul class="md-tabs__list">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../.." class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Home
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item md-tabs__item--active">
|
||||
<a href="../" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
Core API
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../html/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
HTML Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../pdf/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
PDF Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
|
||||
|
||||
<main class="md-main" data-md-component="main">
|
||||
@@ -285,10 +204,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
||||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||
<label class="md-nav__title" for="__drawer">
|
||||
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -332,6 +249,18 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -339,7 +268,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
||||
@@ -349,8 +277,9 @@
|
||||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -359,8 +288,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
|
||||
<label class="md-nav__title" for="__nav_2">
|
||||
@@ -370,27 +305,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Core
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -459,12 +373,88 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Content
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Content">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.content_type" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
content_type
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.metadata" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
metadata
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.raw" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
raw
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.source" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
source
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -478,6 +468,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.ContentType-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.ContentType.HTML" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
HTML
|
||||
@@ -521,6 +521,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -593,13 +603,25 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -608,8 +630,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../html/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -618,8 +641,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_3">
|
||||
@@ -629,27 +658,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../html/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Html
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -712,13 +720,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -727,8 +749,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../pdf/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -737,8 +760,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_4">
|
||||
@@ -748,27 +777,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../pdf/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Pdf
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -883,12 +891,88 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Content
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Content">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.content_type" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
content_type
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.metadata" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
metadata
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.raw" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
raw
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.source" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
source
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -902,6 +986,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.ContentType-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.ContentType.HTML" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
HTML
|
||||
@@ -945,6 +1039,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -971,7 +1075,7 @@
|
||||
|
||||
|
||||
<h2 id="omniread.core.content" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-module-name">omniread.core.content</span>
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.core.content</span>
|
||||
|
||||
|
||||
</h2>
|
||||
@@ -979,6 +1083,8 @@
|
||||
<div class="doc doc-contents first">
|
||||
|
||||
<p>Canonical content models for OmniRead.</p>
|
||||
<hr />
|
||||
<h4 id="omniread.core.content--summary">Summary</h4>
|
||||
<p>This module defines the <strong>format-agnostic content representation</strong> used across
|
||||
all parsers and scrapers in OmniRead.</p>
|
||||
<p>The models defined here represent <em>what</em> was extracted, not <em>how</em> it was
|
||||
@@ -994,94 +1100,38 @@ the semantic meaning of these models.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.core.content-classes">Classes</h3>
|
||||
|
||||
<div class="doc doc-object doc-class">
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.core.content.Content" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-class-name">Content</span>
|
||||
<h4 id="omniread.core.content.Content" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">Content</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-dataclass"><code>dataclass</code></small>
|
||||
</span>
|
||||
|
||||
</h3>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">Content</span><span class="p">(</span><span class="n">raw</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">content_type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span>
|
||||
</code></pre></div>
|
||||
</h4>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">Content</span><span class="p">(</span><span class="n">raw</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">content_type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
|
||||
<p>Normalized representation of extracted content.</p>
|
||||
<p>A <code>Content</code> instance represents a raw content payload along with minimal
|
||||
contextual metadata describing its origin and type.</p>
|
||||
<p>This class is the <strong>primary exchange format</strong> between:
|
||||
- Scrapers
|
||||
- Parsers
|
||||
- Downstream consumers</p>
|
||||
|
||||
|
||||
<p><span class="doc-section-title">Attributes:</span></p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td><code><span title="omniread.core.content.Content.raw">raw</span></code></td>
|
||||
<td>
|
||||
<code>bytes</code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>Raw content bytes as retrieved from the source.</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td><code><span title="omniread.core.content.Content.source">source</span></code></td>
|
||||
<td>
|
||||
<code>str</code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>Identifier of the content origin (URL, file path, or logical name).</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td><code><span title="omniread.core.content.Content.content_type">content_type</span></code></td>
|
||||
<td>
|
||||
<code><span title="typing.Optional">Optional</span>[<a class="autorefs autorefs-internal" title="omniread.core.content.ContentType" href="#omniread.core.content.ContentType">ContentType</a>]</code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>Optional MIME type of the content, if known.</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td><code><span title="omniread.core.content.Content.metadata">metadata</span></code></td>
|
||||
<td>
|
||||
<code><span title="typing.Optional">Optional</span>[<span title="typing.Mapping">Mapping</span>[str, <span title="typing.Any">Any</span>]]</code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>Optional, implementation-defined metadata associated with
|
||||
the content (e.g., headers, encoding hints, extraction notes).</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
|
||||
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- A `Content` instance represents a raw content payload along with minimal contextual metadata describing its origin and type
|
||||
- This class is the primary exchange format between Scrapers, Parsers, and Downstream consumers
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
@@ -1091,7 +1141,101 @@ the content (e.g., headers, encoding hints, extraction notes).</p>
|
||||
|
||||
|
||||
|
||||
<h5 id="omniread.core.content.Content-attributes">Attributes</h5>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h6 id="omniread.core.content.Content.content_type" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">content_type</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-class-attribute"><code>class-attribute</code></small>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">content_type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Optional MIME type of the content, if known.</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h6 id="omniread.core.content.Content.metadata" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">metadata</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-class-attribute"><code>class-attribute</code></small>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Optional, implementation-defined metadata associated with the content (e.g., headers, encoding hints, extraction notes).</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h6 id="omniread.core.content.Content.raw" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">raw</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">raw</span><span class="p">:</span> <span class="nb">bytes</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Raw content bytes as retrieved from the source.</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h6 id="omniread.core.content.Content.source" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">source</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">source</span><span class="p">:</span> <span class="nb">str</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Identifier of the content origin (URL, file path, or logical name).</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
@@ -1107,11 +1251,11 @@ the content (e.g., headers, encoding hints, extraction notes).</p>
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.core.content.ContentType" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-class-name">ContentType</span>
|
||||
<h4 id="omniread.core.content.ContentType" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">ContentType</span>
|
||||
|
||||
|
||||
</h3>
|
||||
</h4>
|
||||
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
@@ -1120,11 +1264,17 @@ the content (e.g., headers, encoding hints, extraction notes).</p>
|
||||
|
||||
|
||||
<p>Supported MIME types for extracted content.</p>
|
||||
<p>This enum represents the declared or inferred media type of the content
|
||||
source. It is primarily used for routing content to the appropriate
|
||||
parser or downstream consumer.</p>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Guarantees:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
|
||||
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- This enum represents the declared or inferred media type of the content source
|
||||
- It is primarily used for routing content to the appropriate parser or downstream consumer
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
<div class="doc doc-children">
|
||||
@@ -1133,14 +1283,14 @@ parser or downstream consumer.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h5 id="omniread.core.content.ContentType-attributes">Attributes</h5>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h4 id="omniread.core.content.ContentType.HTML" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-attribute-name">HTML</span>
|
||||
<h6 id="omniread.core.content.ContentType.HTML" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">HTML</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
@@ -1148,9 +1298,9 @@ parser or downstream consumer.</p>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="n">HTML</span> <span class="o">=</span> <span class="s1">'text/html'</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">HTML</span> <span class="o">=</span> <span class="s1">'text/html'</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1163,8 +1313,8 @@ parser or downstream consumer.</p>
|
||||
|
||||
|
||||
|
||||
<h4 id="omniread.core.content.ContentType.JSON" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-attribute-name">JSON</span>
|
||||
<h6 id="omniread.core.content.ContentType.JSON" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">JSON</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
@@ -1172,9 +1322,9 @@ parser or downstream consumer.</p>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="n">JSON</span> <span class="o">=</span> <span class="s1">'application/json'</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">JSON</span> <span class="o">=</span> <span class="s1">'application/json'</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1187,8 +1337,8 @@ parser or downstream consumer.</p>
|
||||
|
||||
|
||||
|
||||
<h4 id="omniread.core.content.ContentType.PDF" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-attribute-name">PDF</span>
|
||||
<h6 id="omniread.core.content.ContentType.PDF" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">PDF</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
@@ -1196,9 +1346,9 @@ parser or downstream consumer.</p>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="n">PDF</span> <span class="o">=</span> <span class="s1">'application/pdf'</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">PDF</span> <span class="o">=</span> <span class="s1">'application/pdf'</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1211,8 +1361,8 @@ parser or downstream consumer.</p>
|
||||
|
||||
|
||||
|
||||
<h4 id="omniread.core.content.ContentType.XML" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-attribute-name">XML</span>
|
||||
<h6 id="omniread.core.content.ContentType.XML" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">XML</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
@@ -1220,9 +1370,9 @@ parser or downstream consumer.</p>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="n">XML</span> <span class="o">=</span> <span class="s1">'application/xml'</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">XML</span> <span class="o">=</span> <span class="s1">'application/xml'</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1266,6 +1416,8 @@ parser or downstream consumer.</p>
|
||||
</div>
|
||||
|
||||
|
||||
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
|
||||
|
||||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||
</div>
|
||||
|
||||
@@ -1303,7 +1455,7 @@ parser or downstream consumer.</p>
|
||||
|
||||
|
||||
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
|
||||
|
||||
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -86,7 +86,9 @@
|
||||
|
||||
|
||||
|
||||
<header class="md-header" data-md-component="header">
|
||||
|
||||
|
||||
<header class="md-header md-header--shadow" data-md-component="header">
|
||||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||||
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -151,12 +153,19 @@
|
||||
</label>
|
||||
<nav class="md-search__options" aria-label="Search">
|
||||
|
||||
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
|
||||
</a>
|
||||
|
||||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
||||
|
||||
</form>
|
||||
<div class="md-search__output">
|
||||
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||
@@ -182,96 +191,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
||||
<div class="md-grid">
|
||||
<ul class="md-tabs__list">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../.." class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Home
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item md-tabs__item--active">
|
||||
<a href="../" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
Core API
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../html/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
HTML Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../pdf/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
PDF Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
|
||||
|
||||
<main class="md-main" data-md-component="main">
|
||||
@@ -285,10 +204,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
||||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||
<label class="md-nav__title" for="__drawer">
|
||||
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -332,6 +249,18 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -339,7 +268,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
||||
@@ -349,8 +277,9 @@
|
||||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -359,8 +288,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
|
||||
<label class="md-nav__title" for="__nav_2">
|
||||
@@ -370,27 +305,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Core
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -482,6 +396,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser.BaseParser" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
BaseParser
|
||||
@@ -492,6 +425,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser.BaseParser-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser.BaseParser.supported_types" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
supported_types
|
||||
@@ -500,6 +443,21 @@
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser.BaseParser-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser.BaseParser.parse" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
@@ -526,6 +484,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -575,13 +543,25 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -590,8 +570,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../html/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -600,8 +581,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_3">
|
||||
@@ -611,27 +598,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../html/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Html
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -694,13 +660,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -709,8 +689,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../pdf/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -719,8 +700,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_4">
|
||||
@@ -730,27 +717,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../pdf/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Pdf
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -865,6 +831,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser.BaseParser" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
BaseParser
|
||||
@@ -875,6 +860,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser.BaseParser-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser.BaseParser.supported_types" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
supported_types
|
||||
@@ -883,6 +878,21 @@
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser.BaseParser-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.parser.BaseParser.parse" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
@@ -909,6 +919,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -935,7 +955,7 @@
|
||||
|
||||
|
||||
<h2 id="omniread.core.parser" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-module-name">omniread.core.parser</span>
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.core.parser</span>
|
||||
|
||||
|
||||
</h2>
|
||||
@@ -943,6 +963,8 @@
|
||||
<div class="doc doc-contents first">
|
||||
|
||||
<p>Abstract parsing contracts for OmniRead.</p>
|
||||
<hr />
|
||||
<h4 id="omniread.core.parser--summary">Summary</h4>
|
||||
<p>This module defines the <strong>format-agnostic parser interface</strong> used to transform
|
||||
raw content into structured, typed representations.</p>
|
||||
<p>Parsers are responsible for:
|
||||
@@ -963,19 +985,19 @@ raw content into structured, typed representations.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.core.parser-classes">Classes</h3>
|
||||
|
||||
<div class="doc doc-object doc-class">
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.core.parser.BaseParser" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-class-name">BaseParser</span>
|
||||
<h4 id="omniread.core.parser.BaseParser" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">BaseParser</span>
|
||||
|
||||
|
||||
</h3>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">BaseParser</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="n">Content</span><span class="p">)</span>
|
||||
</code></pre></div>
|
||||
</h4>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">BaseParser</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="n">Content</span><span class="p">)</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
<p class="doc doc-class-bases">
|
||||
@@ -983,16 +1005,23 @@ raw content into structured, typed representations.</p>
|
||||
|
||||
|
||||
<p>Base interface for all parsers.</p>
|
||||
<p>A parser is a self-contained object that owns the Content
|
||||
it is responsible for interpreting.</p>
|
||||
<p>Implementations must:
|
||||
- Declare supported content types via <code>supported_types</code>
|
||||
- Raise parsing-specific exceptions from <code>parse()</code>
|
||||
- Remain deterministic for a given input</p>
|
||||
<p>Consumers may rely on:
|
||||
- Early validation of content compatibility
|
||||
- Type-stable return values from <code>parse()</code></p>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Guarantees:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
|
||||
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- A parser is a self-contained object that owns the Content it is responsible for interpreting
|
||||
- Consumers may rely on early validation of content compatibility and type-stable return values from `parse()`
|
||||
</code></pre></div></td></tr></table></div>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
|
||||
<span class="normal">2</span>
|
||||
<span class="normal">3</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must declare supported content types via `supported_types`
|
||||
- Implementations must raise parsing-specific exceptions from `parse()`
|
||||
- Implementations must remain deterministic for a given input
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
<p>Initialize the parser with content to be parsed.</p>
|
||||
|
||||
|
||||
@@ -1010,7 +1039,7 @@ it is responsible for interpreting.</p>
|
||||
<tr class="doc-section-item">
|
||||
<td><code>content</code></td>
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../content/#omniread.core.content.Content">Content</a></code>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../omniread/core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
@@ -1056,14 +1085,14 @@ it is responsible for interpreting.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h5 id="omniread.core.parser.BaseParser-attributes">Attributes</h5>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h4 id="omniread.core.parser.BaseParser.supported_types" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-attribute-name">supported_types</span>
|
||||
<h6 id="omniread.core.parser.BaseParser.supported_types" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">supported_types</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
@@ -1071,53 +1100,50 @@ it is responsible for interpreting.</p>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="n">supported_types</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">supported_types</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Set of content types supported by this parser.</p>
|
||||
<p>An empty set indicates that the parser is content-type agnostic.</p>
|
||||
<p>Set of content types supported by this parser. An empty set indicates that the parser is content-type agnostic.</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<h5 id="omniread.core.parser.BaseParser-functions">Functions</h5>
|
||||
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.core.parser.BaseParser.parse" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">parse</span>
|
||||
<h6 id="omniread.core.parser.BaseParser.parse" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse</span><span class="p">()</span> <span class="o">-></span> <span class="n">T</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse</span><span class="p">()</span> <span class="o">-></span> <span class="n">T</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Parse the owned content into structured output.</p>
|
||||
<p>Implementations must fully consume the provided content and
|
||||
return a deterministic, structured output.</p>
|
||||
|
||||
|
||||
<p><span class="doc-section-title">Returns:</span></p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Name</th> <th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<td><code>T</code></td> <td>
|
||||
<code><span title="omniread.core.parser.T">T</span></code>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1152,6 +1178,13 @@ return a deterministic, structured output.</p>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must fully consume the provided content and return a deterministic, structured output
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
@@ -1159,13 +1192,13 @@ return a deterministic, structured output.</p>
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.core.parser.BaseParser.supports" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">supports</span>
|
||||
<h6 id="omniread.core.parser.BaseParser.supports" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">supports</span>
|
||||
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">supports</span><span class="p">()</span> <span class="o">-></span> <span class="nb">bool</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">supports</span><span class="p">()</span> <span class="o">-></span> <span class="nb">bool</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1176,13 +1209,13 @@ return a deterministic, structured output.</p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Name</th> <th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<td><code>bool</code></td> <td>
|
||||
<code>bool</code>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1231,6 +1264,8 @@ return a deterministic, structured output.</p>
|
||||
</div>
|
||||
|
||||
|
||||
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
|
||||
|
||||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||
</div>
|
||||
|
||||
@@ -1268,7 +1303,7 @@ return a deterministic, structured output.</p>
|
||||
|
||||
|
||||
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
|
||||
|
||||
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>
|
||||
|
||||
@@ -86,7 +86,9 @@
|
||||
|
||||
|
||||
|
||||
<header class="md-header" data-md-component="header">
|
||||
|
||||
|
||||
<header class="md-header md-header--shadow" data-md-component="header">
|
||||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||||
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -151,12 +153,19 @@
|
||||
</label>
|
||||
<nav class="md-search__options" aria-label="Search">
|
||||
|
||||
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
|
||||
</a>
|
||||
|
||||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
||||
|
||||
</form>
|
||||
<div class="md-search__output">
|
||||
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||
@@ -182,96 +191,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
||||
<div class="md-grid">
|
||||
<ul class="md-tabs__list">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../.." class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Home
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item md-tabs__item--active">
|
||||
<a href="../" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
Core API
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../html/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
HTML Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../pdf/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
PDF Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
|
||||
|
||||
<main class="md-main" data-md-component="main">
|
||||
@@ -285,10 +204,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
||||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||
<label class="md-nav__title" for="__drawer">
|
||||
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -332,6 +249,18 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -339,7 +268,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
||||
@@ -349,8 +277,9 @@
|
||||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -359,8 +288,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
|
||||
<label class="md-nav__title" for="__nav_2">
|
||||
@@ -370,27 +305,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Core
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -505,6 +419,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.scraper--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.scraper-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.scraper.BaseScraper" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
BaseScraper
|
||||
@@ -515,6 +448,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.scraper.BaseScraper-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.scraper.BaseScraper.fetch" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
fetch
|
||||
@@ -531,6 +474,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -557,13 +510,25 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -572,8 +537,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../html/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -582,8 +548,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_3">
|
||||
@@ -593,27 +565,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../html/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Html
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -676,13 +627,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -691,8 +656,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../pdf/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -701,8 +667,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_4">
|
||||
@@ -712,27 +684,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../pdf/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Pdf
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -847,6 +798,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.scraper--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.scraper-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.scraper.BaseScraper" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
BaseScraper
|
||||
@@ -857,6 +827,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.scraper.BaseScraper-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.scraper.BaseScraper.fetch" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
fetch
|
||||
@@ -873,6 +853,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -899,7 +889,7 @@
|
||||
|
||||
|
||||
<h2 id="omniread.core.scraper" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-module-name">omniread.core.scraper</span>
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.core.scraper</span>
|
||||
|
||||
|
||||
</h2>
|
||||
@@ -907,6 +897,8 @@
|
||||
<div class="doc doc-contents first">
|
||||
|
||||
<p>Abstract scraping contracts for OmniRead.</p>
|
||||
<hr />
|
||||
<h4 id="omniread.core.scraper--summary">Summary</h4>
|
||||
<p>This module defines the <strong>format-agnostic scraper interface</strong> responsible for
|
||||
acquiring raw content from external sources.</p>
|
||||
<p>Scrapers are responsible for:
|
||||
@@ -928,17 +920,17 @@ acquiring raw content from external sources.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.core.scraper-classes">Classes</h3>
|
||||
|
||||
<div class="doc doc-object doc-class">
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.core.scraper.BaseScraper" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-class-name">BaseScraper</span>
|
||||
<h4 id="omniread.core.scraper.BaseScraper" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">BaseScraper</span>
|
||||
|
||||
|
||||
</h3>
|
||||
</h4>
|
||||
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
@@ -947,21 +939,24 @@ acquiring raw content from external sources.</p>
|
||||
|
||||
|
||||
<p>Base interface for all scrapers.</p>
|
||||
<p>A scraper is responsible ONLY for fetching raw content
|
||||
(bytes) from a source. It must not interpret or parse it.</p>
|
||||
<p>A scraper is a <strong>stateless acquisition component</strong> that retrieves raw
|
||||
content from a source and returns it as a <code>Content</code> object.</p>
|
||||
<p>Scrapers define <em>how content is obtained</em>, not <em>what the content means</em>.</p>
|
||||
<p>Implementations may vary in:
|
||||
- Transport mechanism (HTTP, filesystem, cloud storage)
|
||||
- Authentication strategy
|
||||
- Retry and backoff behavior</p>
|
||||
<p>Implementations must not:
|
||||
- Parse content
|
||||
- Modify content semantics
|
||||
- Couple scraping logic to a specific parser</p>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
|
||||
<span class="normal">2</span>
|
||||
<span class="normal">3</span>
|
||||
<span class="normal">4</span></pre></div></td><td class="code"><div><pre><span></span><code>- A scraper is responsible ONLY for fetching raw content (bytes) from a source. It must not interpret or parse it
|
||||
- A scraper is a stateless acquisition component that retrieves raw content from a source and returns it as a `Content` object
|
||||
- Scrapers define how content is obtained, not what the content means
|
||||
- Implementations may vary in transport mechanism, authentication strategy, retry and backoff behavior
|
||||
</code></pre></div></td></tr></table></div>
|
||||
<p><strong>Constraints:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must not parse content, modify content semantics, or couple scraping logic to a specific parser
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
<div class="doc doc-children">
|
||||
@@ -972,28 +967,26 @@ content from a source and returns it as a <code>Content</code> object.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h5 id="omniread.core.scraper.BaseScraper-functions">Functions</h5>
|
||||
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.core.scraper.BaseScraper.fetch" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">fetch</span>
|
||||
<h6 id="omniread.core.scraper.BaseScraper.fetch" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">fetch</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">Content</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">Content</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Fetch raw content from the given source.</p>
|
||||
<p>Implementations must retrieve the content referenced by <code>source</code>
|
||||
and return it as raw bytes wrapped in a <code>Content</code> object.</p>
|
||||
|
||||
|
||||
<p><span class="doc-section-title">Parameters:</span></p>
|
||||
@@ -1043,14 +1036,14 @@ and return it as raw bytes wrapped in a <code>Content</code> object.</p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Name</th> <th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../content/#omniread.core.content.Content">Content</a></code>
|
||||
<td><code>Content</code></td> <td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../omniread/core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
@@ -1058,42 +1051,6 @@ and return it as raw bytes wrapped in a <code>Content</code> object.</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<ul>
|
||||
<li>Raw content bytes</li>
|
||||
</ul>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<ul>
|
||||
<li>Source identifier</li>
|
||||
</ul>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<ul>
|
||||
<li>Optional metadata</li>
|
||||
</ul>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
@@ -1120,6 +1077,13 @@ and return it as raw bytes wrapped in a <code>Content</code> object.</p>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must retrieve the content referenced by `source` and return it as raw bytes wrapped in a `Content` object
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
@@ -1157,6 +1121,8 @@ and return it as raw bytes wrapped in a <code>Content</code> object.</p>
|
||||
</div>
|
||||
|
||||
|
||||
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
|
||||
|
||||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||
</div>
|
||||
|
||||
@@ -1194,7 +1160,7 @@ and return it as raw bytes wrapped in a <code>Content</code> object.</p>
|
||||
|
||||
|
||||
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
|
||||
|
||||
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -86,7 +86,9 @@
|
||||
|
||||
|
||||
|
||||
<header class="md-header" data-md-component="header">
|
||||
|
||||
|
||||
<header class="md-header md-header--shadow" data-md-component="header">
|
||||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||||
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -151,12 +153,19 @@
|
||||
</label>
|
||||
<nav class="md-search__options" aria-label="Search">
|
||||
|
||||
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
|
||||
</a>
|
||||
|
||||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
||||
|
||||
</form>
|
||||
<div class="md-search__output">
|
||||
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||
@@ -182,96 +191,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
||||
<div class="md-grid">
|
||||
<ul class="md-tabs__list">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../.." class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Home
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../core/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
Core API
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item md-tabs__item--active">
|
||||
<a href="../" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
HTML Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../pdf/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
PDF Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
|
||||
|
||||
<main class="md-main" data-md-component="main">
|
||||
@@ -285,10 +204,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
||||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||
<label class="md-nav__title" for="__drawer">
|
||||
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -331,13 +248,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -346,8 +277,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../core/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -356,8 +288,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_2">
|
||||
@@ -367,27 +305,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../core/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Core
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -474,6 +391,16 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -481,7 +408,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
||||
@@ -491,8 +417,9 @@
|
||||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -501,8 +428,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
|
||||
<label class="md-nav__title" for="__nav_3">
|
||||
@@ -512,27 +445,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Html
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -601,6 +513,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser.HTMLParser" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
HTMLParser
|
||||
@@ -611,6 +542,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser.HTMLParser-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser.HTMLParser.supported_types" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
supported_types
|
||||
@@ -619,6 +560,21 @@
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser.HTMLParser-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser.HTMLParser.parse" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
@@ -681,6 +637,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -730,13 +696,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -745,8 +725,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../pdf/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -755,8 +736,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_4">
|
||||
@@ -766,27 +753,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../pdf/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Pdf
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -901,6 +867,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser.HTMLParser" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
HTMLParser
|
||||
@@ -911,6 +896,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser.HTMLParser-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser.HTMLParser.supported_types" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
supported_types
|
||||
@@ -919,6 +914,21 @@
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser.HTMLParser-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.parser.HTMLParser.parse" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
@@ -981,6 +991,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -1007,7 +1027,7 @@
|
||||
|
||||
|
||||
<h2 id="omniread.html.parser" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-module-name">omniread.html.parser</span>
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.html.parser</span>
|
||||
|
||||
|
||||
</h2>
|
||||
@@ -1015,6 +1035,8 @@
|
||||
<div class="doc doc-contents first">
|
||||
|
||||
<p>HTML parser base implementations for OmniRead.</p>
|
||||
<hr />
|
||||
<h4 id="omniread.html.parser--summary">Summary</h4>
|
||||
<p>This module provides reusable HTML parsing utilities built on top of
|
||||
the abstract parser contracts defined in <code>omniread.core.parser</code>.</p>
|
||||
<p>It supplies:
|
||||
@@ -1033,38 +1055,42 @@ to return a structured representation appropriate for their use case.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.html.parser-classes">Classes</h3>
|
||||
|
||||
<div class="doc doc-object doc-class">
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.html.parser.HTMLParser" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-class-name">HTMLParser</span>
|
||||
<h4 id="omniread.html.parser.HTMLParser" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">HTMLParser</span>
|
||||
|
||||
|
||||
</h3>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">HTMLParser</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="n">Content</span><span class="p">,</span> <span class="n">features</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">'html.parser'</span><span class="p">)</span>
|
||||
</code></pre></div>
|
||||
</h4>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">HTMLParser</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="n">Content</span><span class="p">,</span> <span class="n">features</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">'html.parser'</span><span class="p">)</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
<p class="doc doc-class-bases">
|
||||
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.parser.BaseParser" href="../../core/parser/#omniread.core.parser.BaseParser">BaseParser</a>[<span title="omniread.html.parser.T">T</span>]</code>, <code><span title="typing.Generic">Generic</span>[<span title="omniread.html.parser.T">T</span>]</code></p>
|
||||
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.parser.BaseParser" href="../../omniread/core/parser/#omniread.core.parser.BaseParser">BaseParser</a>[<span title="omniread.html.parser.T">T</span>]</code>, <code><span title="typing.Generic">Generic</span>[<span title="omniread.html.parser.T">T</span>]</code></p>
|
||||
|
||||
|
||||
<p>Base HTML parser.</p>
|
||||
<p>This class extends the core <code>BaseParser</code> with HTML-specific behavior,
|
||||
including DOM parsing via BeautifulSoup and reusable extraction helpers.</p>
|
||||
<p>Provides reusable helpers for HTML extraction.
|
||||
Concrete parsers must explicitly define the return type.</p>
|
||||
<p>Characteristics:
|
||||
- Accepts only HTML content
|
||||
- Owns a parsed BeautifulSoup DOM tree
|
||||
- Provides pure helper utilities for common HTML structures</p>
|
||||
<p>Concrete subclasses must:
|
||||
- Define the output type <code>T</code>
|
||||
- Implement the <code>parse()</code> method</p>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
|
||||
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- This class extends the core `BaseParser` with HTML-specific behavior, including DOM parsing via BeautifulSoup and reusable extraction helpers
|
||||
- Provides reusable helpers for HTML extraction. Concrete parsers must explicitly define the return type
|
||||
</code></pre></div></td></tr></table></div>
|
||||
<p><strong>Guarantees:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Characteristics: Accepts only HTML content, owns a parsed BeautifulSoup DOM tree, provides pure helper utilities for common HTML structures
|
||||
</code></pre></div></td></tr></table></div>
|
||||
<p><strong>Constraints:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Concrete subclasses must define the output type `T` and implement the `parse()` method
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
<p>Initialize the HTML parser.</p>
|
||||
|
||||
|
||||
@@ -1082,7 +1108,7 @@ Concrete parsers must explicitly define the return type.</p>
|
||||
<tr class="doc-section-item">
|
||||
<td><code>content</code></td>
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../omniread/core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
@@ -1100,8 +1126,7 @@ Concrete parsers must explicitly define the return type.</p>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>BeautifulSoup parser backend to use
|
||||
(e.g., 'html.parser', 'lxml').</p>
|
||||
<p>BeautifulSoup parser backend to use (e.g., 'html.parser', 'lxml').</p>
|
||||
</div>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1143,14 +1168,14 @@ Concrete parsers must explicitly define the return type.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h5 id="omniread.html.parser.HTMLParser-attributes">Attributes</h5>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h4 id="omniread.html.parser.HTMLParser.supported_types" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-attribute-name">supported_types</span>
|
||||
<h6 id="omniread.html.parser.HTMLParser.supported_types" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">supported_types</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
@@ -1158,9 +1183,9 @@ Concrete parsers must explicitly define the return type.</p>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="n">supported_types</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">HTML</span><span class="p">}</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">supported_types</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">HTML</span><span class="p">}</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1169,41 +1194,39 @@ Concrete parsers must explicitly define the return type.</p>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<h5 id="omniread.html.parser.HTMLParser-functions">Functions</h5>
|
||||
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.html.parser.HTMLParser.parse" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">parse</span>
|
||||
<h6 id="omniread.html.parser.HTMLParser.parse" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse</span><span class="p">()</span> <span class="o">-></span> <span class="n">T</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse</span><span class="p">()</span> <span class="o">-></span> <span class="n">T</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Fully parse the HTML content into structured output.</p>
|
||||
<p>Implementations must fully interpret the HTML DOM and return
|
||||
a deterministic, structured output.</p>
|
||||
|
||||
|
||||
<p><span class="doc-section-title">Returns:</span></p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Name</th> <th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<td><code>T</code></td> <td>
|
||||
<code><span title="omniread.html.parser.T">T</span></code>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1215,6 +1238,13 @@ a deterministic, structured output.</p>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must fully interpret the HTML DOM and return a deterministic, structured output
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
@@ -1222,17 +1252,17 @@ a deterministic, structured output.</p>
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.html.parser.HTMLParser.parse_div" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">parse_div</span>
|
||||
<h6 id="omniread.html.parser.HTMLParser.parse_div" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse_div</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse_div</span><span class="p">(</span><span class="n">div</span><span class="p">:</span> <span class="n">Tag</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">separator</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">' '</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse_div</span><span class="p">(</span><span class="n">div</span><span class="p">:</span> <span class="n">Tag</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">separator</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">' '</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1286,13 +1316,13 @@ a deterministic, structured output.</p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Name</th> <th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<td><code>str</code></td> <td>
|
||||
<code>str</code>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1311,17 +1341,17 @@ a deterministic, structured output.</p>
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.html.parser.HTMLParser.parse_link" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">parse_link</span>
|
||||
<h6 id="omniread.html.parser.HTMLParser.parse_link" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse_link</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse_link</span><span class="p">(</span><span class="n">a</span><span class="p">:</span> <span class="n">Tag</span><span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse_link</span><span class="p">(</span><span class="n">a</span><span class="p">:</span> <span class="n">Tag</span><span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1372,7 +1402,8 @@ a deterministic, structured output.</p>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>The value of the <code>href</code> attribute, or None if absent.</p>
|
||||
<p>Optional[str]:
|
||||
The value of the <code>href</code> attribute, or None if absent.</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -1386,20 +1417,17 @@ a deterministic, structured output.</p>
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.html.parser.HTMLParser.parse_meta" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">parse_meta</span>
|
||||
<h6 id="omniread.html.parser.HTMLParser.parse_meta" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse_meta</span>
|
||||
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse_meta</span><span class="p">()</span> <span class="o">-></span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse_meta</span><span class="p">()</span> <span class="o">-></span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Extract high-level metadata from the HTML document.</p>
|
||||
<p>This includes:
|
||||
- Document title
|
||||
- <code><meta></code> tag name/property → content mappings</p>
|
||||
|
||||
|
||||
<p><span class="doc-section-title">Returns:</span></p>
|
||||
@@ -1417,13 +1445,23 @@ a deterministic, structured output.</p>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>Dictionary containing extracted metadata.</p>
|
||||
<p>dict[str, Any]:
|
||||
Dictionary containing extracted metadata.</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
|
||||
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- Extract high-level metadata from the HTML document
|
||||
- This includes: Document title, `<meta>` tag name/property → content mappings
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
@@ -1431,17 +1469,17 @@ a deterministic, structured output.</p>
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.html.parser.HTMLParser.parse_table" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">parse_table</span>
|
||||
<h6 id="omniread.html.parser.HTMLParser.parse_table" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse_table</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse_table</span><span class="p">(</span><span class="n">table</span><span class="p">:</span> <span class="n">Tag</span><span class="p">)</span> <span class="o">-></span> <span class="nb">list</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse_table</span><span class="p">(</span><span class="n">table</span><span class="p">:</span> <span class="n">Tag</span><span class="p">)</span> <span class="o">-></span> <span class="nb">list</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1492,7 +1530,8 @@ a deterministic, structured output.</p>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>A list of rows, where each row is a list of cell text values.</p>
|
||||
<p>list[list[str]]:
|
||||
A list of rows, where each row is a list of cell text values.</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -1506,13 +1545,13 @@ a deterministic, structured output.</p>
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.html.parser.HTMLParser.supports" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">supports</span>
|
||||
<h6 id="omniread.html.parser.HTMLParser.supports" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">supports</span>
|
||||
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">supports</span><span class="p">()</span> <span class="o">-></span> <span class="nb">bool</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">supports</span><span class="p">()</span> <span class="o">-></span> <span class="nb">bool</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1523,13 +1562,13 @@ a deterministic, structured output.</p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Name</th> <th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<td><code>bool</code></td> <td>
|
||||
<code>bool</code>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1578,6 +1617,8 @@ a deterministic, structured output.</p>
|
||||
</div>
|
||||
|
||||
|
||||
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
|
||||
|
||||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||
</div>
|
||||
|
||||
@@ -1615,7 +1656,7 @@ a deterministic, structured output.</p>
|
||||
|
||||
|
||||
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
|
||||
|
||||
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>
|
||||
|
||||
@@ -86,7 +86,9 @@
|
||||
|
||||
|
||||
|
||||
<header class="md-header" data-md-component="header">
|
||||
|
||||
|
||||
<header class="md-header md-header--shadow" data-md-component="header">
|
||||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||||
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -151,12 +153,19 @@
|
||||
</label>
|
||||
<nav class="md-search__options" aria-label="Search">
|
||||
|
||||
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
|
||||
</a>
|
||||
|
||||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
||||
|
||||
</form>
|
||||
<div class="md-search__output">
|
||||
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||
@@ -182,96 +191,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
||||
<div class="md-grid">
|
||||
<ul class="md-tabs__list">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../.." class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Home
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../core/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
Core API
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item md-tabs__item--active">
|
||||
<a href="../" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
HTML Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../pdf/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
PDF Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
|
||||
|
||||
<main class="md-main" data-md-component="main">
|
||||
@@ -285,10 +204,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
||||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||
<label class="md-nav__title" for="__drawer">
|
||||
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -331,13 +248,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -346,8 +277,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../core/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -356,8 +288,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_2">
|
||||
@@ -367,27 +305,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../core/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Core
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -474,6 +391,16 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -481,7 +408,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
||||
@@ -491,8 +417,9 @@
|
||||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -501,8 +428,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
|
||||
<label class="md-nav__title" for="__nav_3">
|
||||
@@ -512,27 +445,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Html
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -624,6 +536,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.scraper--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.scraper-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.scraper.HTMLScraper" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
HTMLScraper
|
||||
@@ -634,6 +565,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.scraper.HTMLScraper-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.scraper.HTMLScraper.fetch" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
fetch
|
||||
@@ -659,6 +600,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -685,13 +636,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -700,8 +665,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../pdf/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -710,8 +676,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_4">
|
||||
@@ -721,27 +693,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../pdf/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Pdf
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -856,6 +807,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.scraper--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.scraper-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.scraper.HTMLScraper" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
HTMLScraper
|
||||
@@ -866,6 +836,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.scraper.HTMLScraper-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.html.scraper.HTMLScraper.fetch" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
fetch
|
||||
@@ -891,6 +871,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -917,7 +907,7 @@
|
||||
|
||||
|
||||
<h2 id="omniread.html.scraper" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-module-name">omniread.html.scraper</span>
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.html.scraper</span>
|
||||
|
||||
|
||||
</h2>
|
||||
@@ -925,6 +915,8 @@
|
||||
<div class="doc doc-contents first">
|
||||
|
||||
<p>HTML scraping implementation for OmniRead.</p>
|
||||
<hr />
|
||||
<h4 id="omniread.html.scraper--summary">Summary</h4>
|
||||
<p>This module provides an HTTP-based scraper for retrieving HTML documents.
|
||||
It implements the core <code>BaseScraper</code> contract using <code>httpx</code> as the transport
|
||||
layer.</p>
|
||||
@@ -946,38 +938,39 @@ layer.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.html.scraper-classes">Classes</h3>
|
||||
|
||||
<div class="doc doc-object doc-class">
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.html.scraper.HTMLScraper" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-class-name">HTMLScraper</span>
|
||||
<h4 id="omniread.html.scraper.HTMLScraper" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">HTMLScraper</span>
|
||||
|
||||
|
||||
</h3>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">HTMLScraper</span><span class="p">(</span><span class="o">*</span><span class="p">,</span> <span class="n">client</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">httpx</span><span class="o">.</span><span class="n">Client</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">15.0</span><span class="p">,</span> <span class="n">headers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">follow_redirects</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span>
|
||||
</code></pre></div>
|
||||
</h4>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">HTMLScraper</span><span class="p">(</span><span class="o">*</span><span class="p">,</span> <span class="n">client</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">httpx</span><span class="o">.</span><span class="n">Client</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">15.0</span><span class="p">,</span> <span class="n">headers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">follow_redirects</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
<p class="doc doc-class-bases">
|
||||
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.scraper.BaseScraper" href="../../core/scraper/#omniread.core.scraper.BaseScraper">BaseScraper</a></code></p>
|
||||
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.scraper.BaseScraper" href="../../omniread/core/scraper/#omniread.core.scraper.BaseScraper">BaseScraper</a></code></p>
|
||||
|
||||
|
||||
<p>Base HTML scraper using httpx.</p>
|
||||
<p>This scraper retrieves HTML documents over HTTP(S) and returns them
|
||||
as raw content wrapped in a <code>Content</code> object.</p>
|
||||
<p>Fetches raw bytes and metadata only.
|
||||
The scraper:
|
||||
- Uses <code>httpx.Client</code> for HTTP requests
|
||||
- Enforces an HTML content type
|
||||
- Preserves HTTP response metadata</p>
|
||||
<p>The scraper does not:
|
||||
- Parse HTML
|
||||
- Perform retries or backoff
|
||||
- Handle non-HTML responses</p>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
|
||||
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- This scraper retrieves HTML documents over HTTP(S) and returns them as raw content wrapped in a `Content` object
|
||||
- Fetches raw bytes and metadata only. The scraper uses `httpx.Client` for HTTP requests, enforces an HTML content type, preserves HTTP response metadata
|
||||
</code></pre></div></td></tr></table></div>
|
||||
<p><strong>Constraints:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- The scraper does not: Parse HTML, perform retries or backoff, handle non-HTML responses
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
<p>Initialize the HTML scraper.</p>
|
||||
|
||||
|
||||
@@ -995,12 +988,11 @@ The scraper:
|
||||
<tr class="doc-section-item">
|
||||
<td><code>client</code></td>
|
||||
<td>
|
||||
<code><span title="typing.Optional">Optional</span>[<span title="httpx.Client">Client</span>]</code>
|
||||
<code><span title="httpx.Client">Client</span> | None</code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>Optional pre-configured <code>httpx.Client</code>. If omitted,
|
||||
a client is created internally.</p>
|
||||
<p>Optional pre-configured <code>httpx.Client</code>. If omitted, a client is created internally.</p>
|
||||
</div>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1063,18 +1055,18 @@ a client is created internally.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h5 id="omniread.html.scraper.HTMLScraper-functions">Functions</h5>
|
||||
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.html.scraper.HTMLScraper.fetch" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">fetch</span>
|
||||
<h6 id="omniread.html.scraper.HTMLScraper.fetch" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">fetch</span>
|
||||
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">Content</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">Content</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1128,66 +1120,18 @@ a client is created internally.</p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Name</th> <th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
|
||||
<td><code>Content</code></td> <td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../omniread/core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>A <code>Content</code> instance containing:</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<ul>
|
||||
<li>Raw HTML bytes</li>
|
||||
</ul>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<ul>
|
||||
<li>Source URL</li>
|
||||
</ul>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<ul>
|
||||
<li>HTML content type</li>
|
||||
</ul>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<ul>
|
||||
<li>HTTP response metadata</li>
|
||||
</ul>
|
||||
<p>A <code>Content</code> instance containing raw HTML bytes, source URL, HTML content type, and HTTP response metadata.</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -1234,13 +1178,13 @@ a client is created internally.</p>
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.html.scraper.HTMLScraper.validate_content_type" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">validate_content_type</span>
|
||||
<h6 id="omniread.html.scraper.HTMLScraper.validate_content_type" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">validate_content_type</span>
|
||||
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">validate_content_type</span><span class="p">(</span><span class="n">response</span><span class="p">:</span> <span class="n">httpx</span><span class="o">.</span><span class="n">Response</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">validate_content_type</span><span class="p">(</span><span class="n">response</span><span class="p">:</span> <span class="n">httpx</span><span class="o">.</span><span class="n">Response</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1291,8 +1235,7 @@ a client is created internally.</p>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>If the <code>Content-Type</code> header is missing or does not
|
||||
indicate HTML content.</p>
|
||||
<p>If the <code>Content-Type</code> header is missing or does not indicate HTML content.</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -1336,6 +1279,8 @@ indicate HTML content.</p>
|
||||
</div>
|
||||
|
||||
|
||||
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
|
||||
|
||||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||
</div>
|
||||
|
||||
@@ -1373,7 +1318,7 @@ indicate HTML content.</p>
|
||||
|
||||
|
||||
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
|
||||
|
||||
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
1248
libs/omniread/site/omniread/core/content/index.html
Normal file
1248
libs/omniread/site/omniread/core/content/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1810
libs/omniread/site/omniread/core/index.html
Normal file
1810
libs/omniread/site/omniread/core/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1156
libs/omniread/site/omniread/core/parser/index.html
Normal file
1156
libs/omniread/site/omniread/core/parser/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1046
libs/omniread/site/omniread/core/scraper/index.html
Normal file
1046
libs/omniread/site/omniread/core/scraper/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1857
libs/omniread/site/omniread/html/index.html
Normal file
1857
libs/omniread/site/omniread/html/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1473
libs/omniread/site/omniread/html/parser/index.html
Normal file
1473
libs/omniread/site/omniread/html/parser/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1195
libs/omniread/site/omniread/html/scraper/index.html
Normal file
1195
libs/omniread/site/omniread/html/scraper/index.html
Normal file
File diff suppressed because it is too large
Load Diff
3113
libs/omniread/site/omniread/index.html
Normal file
3113
libs/omniread/site/omniread/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1204
libs/omniread/site/omniread/pdf/client/index.html
Normal file
1204
libs/omniread/site/omniread/pdf/client/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1590
libs/omniread/site/omniread/pdf/index.html
Normal file
1590
libs/omniread/site/omniread/pdf/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1144
libs/omniread/site/omniread/pdf/parser/index.html
Normal file
1144
libs/omniread/site/omniread/pdf/parser/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1056
libs/omniread/site/omniread/pdf/scraper/index.html
Normal file
1056
libs/omniread/site/omniread/pdf/scraper/index.html
Normal file
File diff suppressed because it is too large
Load Diff
@@ -86,7 +86,9 @@
|
||||
|
||||
|
||||
|
||||
<header class="md-header" data-md-component="header">
|
||||
|
||||
|
||||
<header class="md-header md-header--shadow" data-md-component="header">
|
||||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||||
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -151,12 +153,19 @@
|
||||
</label>
|
||||
<nav class="md-search__options" aria-label="Search">
|
||||
|
||||
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
|
||||
</a>
|
||||
|
||||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
||||
|
||||
</form>
|
||||
<div class="md-search__output">
|
||||
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||
@@ -182,96 +191,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
||||
<div class="md-grid">
|
||||
<ul class="md-tabs__list">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../.." class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Home
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../core/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
Core API
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../html/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
HTML Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item md-tabs__item--active">
|
||||
<a href="../" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
PDF Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
|
||||
|
||||
<main class="md-main" data-md-component="main">
|
||||
@@ -285,10 +204,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
||||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||
<label class="md-nav__title" for="__drawer">
|
||||
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -331,13 +248,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -346,8 +277,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../core/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -356,8 +288,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_2">
|
||||
@@ -367,27 +305,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../core/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Core
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -473,13 +390,25 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -488,8 +417,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../html/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -498,8 +428,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_3">
|
||||
@@ -509,27 +445,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../html/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Html
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -593,6 +508,18 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -600,7 +527,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
||||
@@ -610,8 +536,9 @@
|
||||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked>
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -620,8 +547,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true">
|
||||
<label class="md-nav__title" for="__nav_4">
|
||||
@@ -631,27 +564,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Pdf
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -720,6 +632,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client.BasePDFClient" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
BasePDFClient
|
||||
@@ -730,6 +661,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client.BasePDFClient-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client.BasePDFClient.fetch" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
fetch
|
||||
@@ -741,6 +682,11 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -754,6 +700,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client.FileSystemPDFClient-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client.FileSystemPDFClient.fetch" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
fetch
|
||||
@@ -770,6 +726,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -871,6 +837,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client.BasePDFClient" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
BasePDFClient
|
||||
@@ -881,6 +866,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client.BasePDFClient-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client.BasePDFClient.fetch" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
fetch
|
||||
@@ -892,6 +887,11 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -905,6 +905,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client.FileSystemPDFClient-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.client.FileSystemPDFClient.fetch" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
fetch
|
||||
@@ -921,6 +931,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -947,7 +967,7 @@
|
||||
|
||||
|
||||
<h2 id="omniread.pdf.client" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-module-name">omniread.pdf.client</span>
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.pdf.client</span>
|
||||
|
||||
|
||||
</h2>
|
||||
@@ -955,6 +975,8 @@
|
||||
<div class="doc doc-contents first">
|
||||
|
||||
<p>PDF client abstractions for OmniRead.</p>
|
||||
<hr />
|
||||
<h4 id="omniread.pdf.client--summary">Summary</h4>
|
||||
<p>This module defines the <strong>client layer</strong> responsible for retrieving raw PDF
|
||||
bytes from a concrete backing store.</p>
|
||||
<p>Clients provide low-level access to PDF binaries and are intentionally
|
||||
@@ -974,17 +996,17 @@ interpretation, or content extraction.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.pdf.client-classes">Classes</h3>
|
||||
|
||||
<div class="doc doc-object doc-class">
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.pdf.client.BasePDFClient" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-class-name">BasePDFClient</span>
|
||||
<h4 id="omniread.pdf.client.BasePDFClient" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">BasePDFClient</span>
|
||||
|
||||
|
||||
</h3>
|
||||
</h4>
|
||||
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
@@ -994,12 +1016,15 @@ interpretation, or content extraction.</p>
|
||||
|
||||
<p>Abstract client responsible for retrieving PDF bytes
|
||||
from a specific backing store (filesystem, S3, FTP, etc.).</p>
|
||||
<p>Implementations must:
|
||||
- Accept a source identifier appropriate to the backing store
|
||||
- Return the full PDF binary payload
|
||||
- Raise retrieval-specific errors on failure</p>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must accept a source identifier appropriate to the backing store, return the full PDF binary payload, and raise retrieval-specific errors on failure
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
<div class="doc doc-children">
|
||||
@@ -1010,22 +1035,22 @@ from a specific backing store (filesystem, S3, FTP, etc.).</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h5 id="omniread.pdf.client.BasePDFClient-functions">Functions</h5>
|
||||
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.pdf.client.BasePDFClient.fetch" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">fetch</span>
|
||||
<h6 id="omniread.pdf.client.BasePDFClient.fetch" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">fetch</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bytes</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bytes</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1050,8 +1075,7 @@ from a specific backing store (filesystem, S3, FTP, etc.).</p>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>Identifier of the PDF location, such as a file path,
|
||||
object storage key, or remote reference.</p>
|
||||
<p>Identifier of the PDF location, such as a file path, object storage key, or remote reference.</p>
|
||||
</div>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1066,13 +1090,13 @@ object storage key, or remote reference.</p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Name</th> <th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<td><code>bytes</code></td> <td>
|
||||
<code>bytes</code>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1123,11 +1147,11 @@ object storage key, or remote reference.</p>
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.pdf.client.FileSystemPDFClient" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-class-name">FileSystemPDFClient</span>
|
||||
<h4 id="omniread.pdf.client.FileSystemPDFClient" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">FileSystemPDFClient</span>
|
||||
|
||||
|
||||
</h3>
|
||||
</h4>
|
||||
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
@@ -1136,10 +1160,15 @@ object storage key, or remote reference.</p>
|
||||
|
||||
|
||||
<p>PDF client that reads from the local filesystem.</p>
|
||||
<p>This client reads PDF files directly from the disk and returns their raw
|
||||
binary contents.</p>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Guarantees:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- This client reads PDF files directly from the disk and returns their raw binary contents
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
<div class="doc doc-children">
|
||||
@@ -1150,18 +1179,18 @@ binary contents.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h5 id="omniread.pdf.client.FileSystemPDFClient-functions">Functions</h5>
|
||||
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.pdf.client.FileSystemPDFClient.fetch" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">fetch</span>
|
||||
<h6 id="omniread.pdf.client.FileSystemPDFClient.fetch" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">fetch</span>
|
||||
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">fetch</span><span class="p">(</span><span class="n">path</span><span class="p">:</span> <span class="n">Path</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bytes</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">fetch</span><span class="p">(</span><span class="n">path</span><span class="p">:</span> <span class="n">Path</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bytes</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1201,13 +1230,13 @@ binary contents.</p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Name</th> <th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<td><code>bytes</code></td> <td>
|
||||
<code>bytes</code>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1289,6 +1318,8 @@ binary contents.</p>
|
||||
</div>
|
||||
|
||||
|
||||
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
|
||||
|
||||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||
</div>
|
||||
|
||||
@@ -1326,7 +1357,7 @@ binary contents.</p>
|
||||
|
||||
|
||||
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
|
||||
|
||||
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -86,7 +86,9 @@
|
||||
|
||||
|
||||
|
||||
<header class="md-header" data-md-component="header">
|
||||
|
||||
|
||||
<header class="md-header md-header--shadow" data-md-component="header">
|
||||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||||
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -151,12 +153,19 @@
|
||||
</label>
|
||||
<nav class="md-search__options" aria-label="Search">
|
||||
|
||||
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
|
||||
</a>
|
||||
|
||||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
||||
|
||||
</form>
|
||||
<div class="md-search__output">
|
||||
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||
@@ -182,96 +191,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
||||
<div class="md-grid">
|
||||
<ul class="md-tabs__list">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../.." class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Home
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../core/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
Core API
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../html/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
HTML Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item md-tabs__item--active">
|
||||
<a href="../" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
PDF Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
|
||||
|
||||
<main class="md-main" data-md-component="main">
|
||||
@@ -285,10 +204,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
||||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||
<label class="md-nav__title" for="__drawer">
|
||||
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -331,13 +248,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -346,8 +277,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../core/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -356,8 +288,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_2">
|
||||
@@ -367,27 +305,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../core/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Core
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -473,13 +390,25 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -488,8 +417,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../html/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -498,8 +428,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_3">
|
||||
@@ -509,27 +445,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../html/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Html
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -593,6 +508,18 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -600,7 +527,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
||||
@@ -610,8 +536,9 @@
|
||||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked>
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -620,8 +547,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true">
|
||||
<label class="md-nav__title" for="__nav_4">
|
||||
@@ -631,27 +564,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Pdf
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -743,6 +655,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser.PDFParser" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
PDFParser
|
||||
@@ -753,6 +684,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser.PDFParser-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser.PDFParser.supported_types" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
supported_types
|
||||
@@ -761,6 +702,21 @@
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser.PDFParser-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser.PDFParser.parse" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
@@ -787,6 +743,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -865,6 +831,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser.PDFParser" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
PDFParser
|
||||
@@ -875,6 +860,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser.PDFParser-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser.PDFParser.supported_types" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
supported_types
|
||||
@@ -883,6 +878,21 @@
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser.PDFParser-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.parser.PDFParser.parse" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
@@ -909,6 +919,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -935,7 +955,7 @@
|
||||
|
||||
|
||||
<h2 id="omniread.pdf.parser" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-module-name">omniread.pdf.parser</span>
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.pdf.parser</span>
|
||||
|
||||
|
||||
</h2>
|
||||
@@ -943,6 +963,8 @@
|
||||
<div class="doc doc-contents first">
|
||||
|
||||
<p>PDF parser base implementations for OmniRead.</p>
|
||||
<hr />
|
||||
<h4 id="omniread.pdf.parser--summary">Summary</h4>
|
||||
<p>This module defines the <strong>PDF-specific parser contract</strong>, extending the
|
||||
format-agnostic <code>BaseParser</code> with constraints appropriate for PDF content.</p>
|
||||
<p>PDF parsers are responsible for interpreting binary PDF data and producing
|
||||
@@ -957,32 +979,37 @@ structured representations suitable for downstream consumption.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.pdf.parser-classes">Classes</h3>
|
||||
|
||||
<div class="doc doc-object doc-class">
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.pdf.parser.PDFParser" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-class-name">PDFParser</span>
|
||||
<h4 id="omniread.pdf.parser.PDFParser" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">PDFParser</span>
|
||||
|
||||
|
||||
</h3>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">PDFParser</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="n">Content</span><span class="p">)</span>
|
||||
</code></pre></div>
|
||||
</h4>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">PDFParser</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="n">Content</span><span class="p">)</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
<p class="doc doc-class-bases">
|
||||
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.parser.BaseParser" href="../../core/parser/#omniread.core.parser.BaseParser">BaseParser</a>[<span title="omniread.pdf.parser.T">T</span>]</code>, <code><span title="typing.Generic">Generic</span>[<span title="omniread.pdf.parser.T">T</span>]</code></p>
|
||||
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.parser.BaseParser" href="../../omniread/core/parser/#omniread.core.parser.BaseParser">BaseParser</a>[<span title="omniread.pdf.parser.T">T</span>]</code>, <code><span title="typing.Generic">Generic</span>[<span title="omniread.pdf.parser.T">T</span>]</code></p>
|
||||
|
||||
|
||||
<p>Base PDF parser.</p>
|
||||
<p>This class enforces PDF content-type compatibility and provides the
|
||||
extension point for implementing concrete PDF parsing strategies.</p>
|
||||
<p>Concrete implementations must define:
|
||||
- Define the output type <code>T</code>
|
||||
- Implement the <code>parse()</code> method</p>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- This class enforces PDF content-type compatibility and provides the extension point for implementing concrete PDF parsing strategies
|
||||
</code></pre></div></td></tr></table></div>
|
||||
<p><strong>Constraints:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Concrete implementations must: Define the output type `T`, implement the `parse()` method
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
<p>Initialize the parser with content to be parsed.</p>
|
||||
|
||||
|
||||
@@ -1000,7 +1027,7 @@ extension point for implementing concrete PDF parsing strategies.</p>
|
||||
<tr class="doc-section-item">
|
||||
<td><code>content</code></td>
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../omniread/core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
@@ -1046,14 +1073,14 @@ extension point for implementing concrete PDF parsing strategies.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h5 id="omniread.pdf.parser.PDFParser-attributes">Attributes</h5>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h4 id="omniread.pdf.parser.PDFParser.supported_types" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-attribute-name">supported_types</span>
|
||||
<h6 id="omniread.pdf.parser.PDFParser.supported_types" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">supported_types</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
@@ -1061,9 +1088,9 @@ extension point for implementing concrete PDF parsing strategies.</p>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="n">supported_types</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">PDF</span><span class="p">}</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">supported_types</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">PDF</span><span class="p">}</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1072,41 +1099,39 @@ extension point for implementing concrete PDF parsing strategies.</p>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<h5 id="omniread.pdf.parser.PDFParser-functions">Functions</h5>
|
||||
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.pdf.parser.PDFParser.parse" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">parse</span>
|
||||
<h6 id="omniread.pdf.parser.PDFParser.parse" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">parse</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">parse</span><span class="p">()</span> <span class="o">-></span> <span class="n">T</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">parse</span><span class="p">()</span> <span class="o">-></span> <span class="n">T</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Parse PDF content into a structured output.</p>
|
||||
<p>Implementations must fully interpret the PDF binary payload and
|
||||
return a deterministic, structured output.</p>
|
||||
|
||||
|
||||
<p><span class="doc-section-title">Returns:</span></p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Name</th> <th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<td><code>T</code></td> <td>
|
||||
<code><span title="omniread.pdf.parser.T">T</span></code>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1141,6 +1166,13 @@ return a deterministic, structured output.</p>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- Implementations must fully interpret the PDF binary payload and return a deterministic, structured output
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
@@ -1148,13 +1180,13 @@ return a deterministic, structured output.</p>
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.pdf.parser.PDFParser.supports" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">supports</span>
|
||||
<h6 id="omniread.pdf.parser.PDFParser.supports" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">supports</span>
|
||||
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">supports</span><span class="p">()</span> <span class="o">-></span> <span class="nb">bool</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">supports</span><span class="p">()</span> <span class="o">-></span> <span class="nb">bool</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1165,13 +1197,13 @@ return a deterministic, structured output.</p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Name</th> <th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<td><code>bool</code></td> <td>
|
||||
<code>bool</code>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1220,6 +1252,8 @@ return a deterministic, structured output.</p>
|
||||
</div>
|
||||
|
||||
|
||||
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
|
||||
|
||||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||
</div>
|
||||
|
||||
@@ -1257,7 +1291,7 @@ return a deterministic, structured output.</p>
|
||||
|
||||
|
||||
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
|
||||
|
||||
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>
|
||||
|
||||
@@ -84,7 +84,9 @@
|
||||
|
||||
|
||||
|
||||
<header class="md-header" data-md-component="header">
|
||||
|
||||
|
||||
<header class="md-header md-header--shadow" data-md-component="header">
|
||||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||||
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -149,12 +151,19 @@
|
||||
</label>
|
||||
<nav class="md-search__options" aria-label="Search">
|
||||
|
||||
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
|
||||
</a>
|
||||
|
||||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
||||
|
||||
</form>
|
||||
<div class="md-search__output">
|
||||
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||
@@ -180,96 +189,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
||||
<div class="md-grid">
|
||||
<ul class="md-tabs__list">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../.." class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Home
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../core/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
Core API
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../html/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
HTML Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item md-tabs__item--active">
|
||||
<a href="../" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
PDF Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
|
||||
|
||||
<main class="md-main" data-md-component="main">
|
||||
@@ -283,10 +202,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
||||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||
<label class="md-nav__title" for="__drawer">
|
||||
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -329,13 +246,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -344,8 +275,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../core/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -354,8 +286,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_2">
|
||||
@@ -365,27 +303,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../core/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Core
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -471,13 +388,25 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -486,8 +415,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../html/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -496,8 +426,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_3">
|
||||
@@ -507,27 +443,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../html/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Html
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -591,6 +506,18 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -598,7 +525,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
||||
@@ -608,8 +534,9 @@
|
||||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked>
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -618,8 +545,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true">
|
||||
<label class="md-nav__title" for="__nav_4">
|
||||
@@ -629,27 +562,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Pdf
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -764,6 +676,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.scraper--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.scraper-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.scraper.PDFScraper" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
PDFScraper
|
||||
@@ -774,6 +705,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.scraper.PDFScraper-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.scraper.PDFScraper.fetch" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
fetch
|
||||
@@ -790,6 +731,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -845,6 +796,25 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.scraper--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.scraper-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.scraper.PDFScraper" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
PDFScraper
|
||||
@@ -855,6 +825,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.scraper.PDFScraper-functions" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Functions
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Functions">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.pdf.scraper.PDFScraper.fetch" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
fetch
|
||||
@@ -871,6 +851,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -897,7 +887,7 @@
|
||||
|
||||
|
||||
<h2 id="omniread.pdf.scraper" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-module-name">omniread.pdf.scraper</span>
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.pdf.scraper</span>
|
||||
|
||||
|
||||
</h2>
|
||||
@@ -905,6 +895,8 @@
|
||||
<div class="doc doc-contents first">
|
||||
|
||||
<p>PDF scraping implementation for OmniRead.</p>
|
||||
<hr />
|
||||
<h4 id="omniread.pdf.scraper--summary">Summary</h4>
|
||||
<p>This module provides a PDF-specific scraper that coordinates PDF byte
|
||||
retrieval via a client and normalizes the result into a <code>Content</code> object.</p>
|
||||
<p>The scraper implements the core <code>BaseScraper</code> contract while delegating
|
||||
@@ -919,33 +911,39 @@ all storage and access concerns to a <code>BasePDFClient</code> implementation.<
|
||||
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.pdf.scraper-classes">Classes</h3>
|
||||
|
||||
<div class="doc doc-object doc-class">
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.pdf.scraper.PDFScraper" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-class-name">PDFScraper</span>
|
||||
<h4 id="omniread.pdf.scraper.PDFScraper" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">PDFScraper</span>
|
||||
|
||||
|
||||
</h3>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">PDFScraper</span><span class="p">(</span><span class="o">*</span><span class="p">,</span> <span class="n">client</span><span class="p">:</span> <span class="n">BasePDFClient</span><span class="p">)</span>
|
||||
</code></pre></div>
|
||||
</h4>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">PDFScraper</span><span class="p">(</span><span class="o">*</span><span class="p">,</span> <span class="n">client</span><span class="p">:</span> <span class="n">BasePDFClient</span><span class="p">)</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
<p class="doc doc-class-bases">
|
||||
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.scraper.BaseScraper" href="../../core/scraper/#omniread.core.scraper.BaseScraper">BaseScraper</a></code></p>
|
||||
Bases: <code><a class="autorefs autorefs-internal" title="omniread.core.scraper.BaseScraper" href="../../omniread/core/scraper/#omniread.core.scraper.BaseScraper">BaseScraper</a></code></p>
|
||||
|
||||
|
||||
<p>Scraper for PDF sources.</p>
|
||||
<p>Delegates byte retrieval to a PDF client and normalizes
|
||||
output into Content.</p>
|
||||
<p>The scraper:
|
||||
- Does not perform parsing or interpretation
|
||||
- Does not assume a specific storage backend
|
||||
- Preserves caller-provided metadata</p>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
|
||||
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- Delegates byte retrieval to a PDF client and normalizes output into Content
|
||||
- Preserves caller-provided metadata
|
||||
</code></pre></div></td></tr></table></div>
|
||||
<p><strong>Constraints:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>- The scraper: Does not perform parsing or interpretation, does not assume a specific storage backend
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
<p>Initialize the PDF scraper.</p>
|
||||
|
||||
|
||||
@@ -988,18 +986,18 @@ output into Content.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h5 id="omniread.pdf.scraper.PDFScraper-functions">Functions</h5>
|
||||
|
||||
<div class="doc doc-object doc-function">
|
||||
|
||||
|
||||
<h4 id="omniread.pdf.scraper.PDFScraper.fetch" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-function-name">fetch</span>
|
||||
<h6 id="omniread.pdf.scraper.PDFScraper.fetch" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">fetch</span>
|
||||
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">Content</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">fetch</span><span class="p">(</span><span class="n">source</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">Content</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1024,8 +1022,7 @@ output into Content.</p>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>Identifier of the PDF source as understood by the
|
||||
configured PDF client.</p>
|
||||
<p>Identifier of the PDF source as understood by the configured PDF client.</p>
|
||||
</div>
|
||||
</td>
|
||||
<td>
|
||||
@@ -1054,66 +1051,18 @@ configured PDF client.</p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Name</th> <th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
|
||||
<td><code>Content</code></td> <td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../omniread/core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>A <code>Content</code> instance containing:</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<ul>
|
||||
<li>Raw PDF bytes</li>
|
||||
</ul>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<ul>
|
||||
<li>Source identifier</li>
|
||||
</ul>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<ul>
|
||||
<li>PDF content type</li>
|
||||
</ul>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td>
|
||||
<code><a class="autorefs autorefs-internal" title="omniread.core.content.Content" href="../../core/content/#omniread.core.content.Content">Content</a></code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<ul>
|
||||
<li>Optional metadata</li>
|
||||
</ul>
|
||||
<p>A <code>Content</code> instance containing raw PDF bytes, source identifier, PDF content type, and optional metadata.</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -1180,6 +1129,8 @@ configured PDF client.</p>
|
||||
</div>
|
||||
|
||||
|
||||
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
|
||||
|
||||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||
</div>
|
||||
|
||||
@@ -1217,7 +1168,7 @@ configured PDF client.</p>
|
||||
|
||||
|
||||
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
|
||||
|
||||
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>
|
||||
|
||||
File diff suppressed because one or more lines are too long
Binary file not shown.
Reference in New Issue
Block a user