This commit is contained in:
@@ -86,7 +86,9 @@
|
||||
|
||||
|
||||
|
||||
<header class="md-header" data-md-component="header">
|
||||
|
||||
|
||||
<header class="md-header md-header--shadow" data-md-component="header">
|
||||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||||
<a href="../.." title="omniread" class="md-header__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -151,12 +153,19 @@
|
||||
</label>
|
||||
<nav class="md-search__options" aria-label="Search">
|
||||
|
||||
<a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
|
||||
</a>
|
||||
|
||||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||||
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
||||
|
||||
</form>
|
||||
<div class="md-search__output">
|
||||
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||
@@ -182,96 +191,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
||||
<div class="md-grid">
|
||||
<ul class="md-tabs__list">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../.." class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Home
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item md-tabs__item--active">
|
||||
<a href="../" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
Core API
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../html/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
HTML Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-tabs__item">
|
||||
<a href="../../pdf/" class="md-tabs__link">
|
||||
|
||||
|
||||
|
||||
PDF Handling
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
|
||||
|
||||
<main class="md-main" data-md-component="main">
|
||||
@@ -285,10 +204,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
||||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||
<label class="md-nav__title" for="__drawer">
|
||||
<a href="../.." title="omniread" class="md-nav__button md-logo" aria-label="omniread" data-md-component="logo">
|
||||
|
||||
@@ -332,6 +249,18 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -339,7 +268,6 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
||||
@@ -349,8 +277,9 @@
|
||||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -359,8 +288,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
|
||||
<label class="md-nav__title" for="__nav_2">
|
||||
@@ -370,27 +305,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Core
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -459,12 +373,88 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Content
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Content">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.content_type" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
content_type
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.metadata" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
metadata
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.raw" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
raw
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.source" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
source
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -478,6 +468,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.ContentType-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.ContentType.HTML" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
HTML
|
||||
@@ -521,6 +521,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -593,13 +603,25 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -608,8 +630,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../html/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -618,8 +641,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_3">
|
||||
@@ -629,27 +658,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../html/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Html
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -712,13 +720,27 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||
|
||||
|
||||
|
||||
@@ -727,8 +749,9 @@
|
||||
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
|
||||
|
||||
|
||||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
||||
|
||||
<div class="md-nav__link md-nav__container">
|
||||
<a href="../../pdf/" class="md-nav__link ">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
@@ -737,8 +760,14 @@
|
||||
</span>
|
||||
|
||||
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
</a>
|
||||
|
||||
|
||||
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="">
|
||||
<span class="md-nav__icon md-icon"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
||||
<label class="md-nav__title" for="__nav_4">
|
||||
@@ -748,27 +777,6 @@
|
||||
<ul class="md-nav__list" data-md-scrollfix>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="../../pdf/" class="md-nav__link">
|
||||
|
||||
|
||||
|
||||
<span class="md-ellipsis">
|
||||
Pdf
|
||||
|
||||
</span>
|
||||
|
||||
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -883,12 +891,88 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content--summary" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Summary
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content-classes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Classes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Classes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Content
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Content">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.content_type" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
content_type
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.metadata" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
metadata
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.raw" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
raw
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.Content.source" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
source
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -902,6 +986,16 @@
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.ContentType-attributes" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Attributes
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Attributes">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#omniread.core.content.ContentType.HTML" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
HTML
|
||||
@@ -945,6 +1039,16 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -971,7 +1075,7 @@
|
||||
|
||||
|
||||
<h2 id="omniread.core.content" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-module-name">omniread.core.content</span>
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-module"></code> <span class="doc doc-object-name doc-module-name">omniread.core.content</span>
|
||||
|
||||
|
||||
</h2>
|
||||
@@ -979,6 +1083,8 @@
|
||||
<div class="doc doc-contents first">
|
||||
|
||||
<p>Canonical content models for OmniRead.</p>
|
||||
<hr />
|
||||
<h4 id="omniread.core.content--summary">Summary</h4>
|
||||
<p>This module defines the <strong>format-agnostic content representation</strong> used across
|
||||
all parsers and scrapers in OmniRead.</p>
|
||||
<p>The models defined here represent <em>what</em> was extracted, not <em>how</em> it was
|
||||
@@ -994,94 +1100,38 @@ the semantic meaning of these models.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.core.content-classes">Classes</h3>
|
||||
|
||||
<div class="doc doc-object doc-class">
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.core.content.Content" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-class-name">Content</span>
|
||||
<h4 id="omniread.core.content.Content" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">Content</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-dataclass"><code>dataclass</code></small>
|
||||
</span>
|
||||
|
||||
</h3>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="nf">Content</span><span class="p">(</span><span class="n">raw</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">content_type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span>
|
||||
</code></pre></div>
|
||||
</h4>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="nf">Content</span><span class="p">(</span><span class="n">raw</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">content_type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
|
||||
<p>Normalized representation of extracted content.</p>
|
||||
<p>A <code>Content</code> instance represents a raw content payload along with minimal
|
||||
contextual metadata describing its origin and type.</p>
|
||||
<p>This class is the <strong>primary exchange format</strong> between:
|
||||
- Scrapers
|
||||
- Parsers
|
||||
- Downstream consumers</p>
|
||||
|
||||
|
||||
<p><span class="doc-section-title">Attributes:</span></p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="doc-section-item">
|
||||
<td><code><span title="omniread.core.content.Content.raw">raw</span></code></td>
|
||||
<td>
|
||||
<code>bytes</code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>Raw content bytes as retrieved from the source.</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td><code><span title="omniread.core.content.Content.source">source</span></code></td>
|
||||
<td>
|
||||
<code>str</code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>Identifier of the content origin (URL, file path, or logical name).</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td><code><span title="omniread.core.content.Content.content_type">content_type</span></code></td>
|
||||
<td>
|
||||
<code><span title="typing.Optional">Optional</span>[<a class="autorefs autorefs-internal" title="omniread.core.content.ContentType" href="#omniread.core.content.ContentType">ContentType</a>]</code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>Optional MIME type of the content, if known.</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="doc-section-item">
|
||||
<td><code><span title="omniread.core.content.Content.metadata">metadata</span></code></td>
|
||||
<td>
|
||||
<code><span title="typing.Optional">Optional</span>[<span title="typing.Mapping">Mapping</span>[str, <span title="typing.Any">Any</span>]]</code>
|
||||
</td>
|
||||
<td>
|
||||
<div class="doc-md-description">
|
||||
<p>Optional, implementation-defined metadata associated with
|
||||
the content (e.g., headers, encoding hints, extraction notes).</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Responsibilities:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
|
||||
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- A `Content` instance represents a raw content payload along with minimal contextual metadata describing its origin and type
|
||||
- This class is the primary exchange format between Scrapers, Parsers, and Downstream consumers
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
@@ -1091,7 +1141,101 @@ the content (e.g., headers, encoding hints, extraction notes).</p>
|
||||
|
||||
|
||||
|
||||
<h5 id="omniread.core.content.Content-attributes">Attributes</h5>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h6 id="omniread.core.content.Content.content_type" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">content_type</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-class-attribute"><code>class-attribute</code></small>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">content_type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ContentType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Optional MIME type of the content, if known.</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h6 id="omniread.core.content.Content.metadata" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">metadata</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-class-attribute"><code>class-attribute</code></small>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">metadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Optional, implementation-defined metadata associated with the content (e.g., headers, encoding hints, extraction notes).</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h6 id="omniread.core.content.Content.raw" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">raw</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">raw</span><span class="p">:</span> <span class="nb">bytes</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Raw content bytes as retrieved from the source.</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h6 id="omniread.core.content.Content.source" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">source</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">source</span><span class="p">:</span> <span class="nb">str</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
<p>Identifier of the content origin (URL, file path, or logical name).</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
@@ -1107,11 +1251,11 @@ the content (e.g., headers, encoding hints, extraction notes).</p>
|
||||
|
||||
|
||||
|
||||
<h3 id="omniread.core.content.ContentType" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-class-name">ContentType</span>
|
||||
<h4 id="omniread.core.content.ContentType" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">ContentType</span>
|
||||
|
||||
|
||||
</h3>
|
||||
</h4>
|
||||
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
@@ -1120,11 +1264,17 @@ the content (e.g., headers, encoding hints, extraction notes).</p>
|
||||
|
||||
|
||||
<p>Supported MIME types for extracted content.</p>
|
||||
<p>This enum represents the declared or inferred media type of the content
|
||||
source. It is primarily used for routing content to the appropriate
|
||||
parser or downstream consumer.</p>
|
||||
|
||||
|
||||
<details class="notes" open>
|
||||
<summary>Notes</summary>
|
||||
<p><strong>Guarantees:</strong></p>
|
||||
<div class="language-text highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
|
||||
<span class="normal">2</span></pre></div></td><td class="code"><div><pre><span></span><code>- This enum represents the declared or inferred media type of the content source
|
||||
- It is primarily used for routing content to the appropriate parser or downstream consumer
|
||||
</code></pre></div></td></tr></table></div>
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
<div class="doc doc-children">
|
||||
@@ -1133,14 +1283,14 @@ parser or downstream consumer.</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<h5 id="omniread.core.content.ContentType-attributes">Attributes</h5>
|
||||
|
||||
<div class="doc doc-object doc-attribute">
|
||||
|
||||
|
||||
|
||||
<h4 id="omniread.core.content.ContentType.HTML" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-attribute-name">HTML</span>
|
||||
<h6 id="omniread.core.content.ContentType.HTML" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">HTML</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
@@ -1148,9 +1298,9 @@ parser or downstream consumer.</p>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="n">HTML</span> <span class="o">=</span> <span class="s1">'text/html'</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">HTML</span> <span class="o">=</span> <span class="s1">'text/html'</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1163,8 +1313,8 @@ parser or downstream consumer.</p>
|
||||
|
||||
|
||||
|
||||
<h4 id="omniread.core.content.ContentType.JSON" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-attribute-name">JSON</span>
|
||||
<h6 id="omniread.core.content.ContentType.JSON" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">JSON</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
@@ -1172,9 +1322,9 @@ parser or downstream consumer.</p>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="n">JSON</span> <span class="o">=</span> <span class="s1">'application/json'</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">JSON</span> <span class="o">=</span> <span class="s1">'application/json'</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1187,8 +1337,8 @@ parser or downstream consumer.</p>
|
||||
|
||||
|
||||
|
||||
<h4 id="omniread.core.content.ContentType.PDF" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-attribute-name">PDF</span>
|
||||
<h6 id="omniread.core.content.ContentType.PDF" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">PDF</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
@@ -1196,9 +1346,9 @@ parser or downstream consumer.</p>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="n">PDF</span> <span class="o">=</span> <span class="s1">'application/pdf'</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">PDF</span> <span class="o">=</span> <span class="s1">'application/pdf'</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1211,8 +1361,8 @@ parser or downstream consumer.</p>
|
||||
|
||||
|
||||
|
||||
<h4 id="omniread.core.content.ContentType.XML" class="doc doc-heading">
|
||||
<span class="doc doc-object-name doc-attribute-name">XML</span>
|
||||
<h6 id="omniread.core.content.ContentType.XML" class="doc doc-heading">
|
||||
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">XML</span>
|
||||
|
||||
|
||||
<span class="doc doc-labels">
|
||||
@@ -1220,9 +1370,9 @@ parser or downstream consumer.</p>
|
||||
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
|
||||
</span>
|
||||
|
||||
</h4>
|
||||
<div class="doc-signature highlight"><pre><span></span><code><span class="n">XML</span> <span class="o">=</span> <span class="s1">'application/xml'</span>
|
||||
</code></pre></div>
|
||||
</h6>
|
||||
<div class="language-python doc-signature highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1">1</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1"></a><span class="n">XML</span> <span class="o">=</span> <span class="s1">'application/xml'</span>
|
||||
</span></code></pre></div></td></tr></table></div>
|
||||
|
||||
<div class="doc doc-contents ">
|
||||
|
||||
@@ -1266,6 +1416,8 @@ parser or downstream consumer.</p>
|
||||
</div>
|
||||
|
||||
|
||||
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
|
||||
|
||||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||
</div>
|
||||
|
||||
@@ -1303,7 +1455,7 @@ parser or downstream consumer.</p>
|
||||
|
||||
|
||||
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.expand", "navigation.top", "navigation.instant", "content.code.copy", "content.code.annotate"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.expand", "navigation.top", "navigation.instant", "navigation.tracking", "navigation.indexes", "content.code.copy", "content.code.annotate", "content.tabs.link", "content.action.edit", "search.highlight", "search.share", "search.suggest"], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||||
|
||||
|
||||
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>
|
||||
|
||||
Reference in New Issue
Block a user