Reviewed-on: #1 Co-authored-by: Vishesh 'ironeagle' Bangotra <aetoskia@gmail.com> Co-committed-by: Vishesh 'ironeagle' Bangotra <aetoskia@gmail.com>
118 lines
4.8 KiB
JSON
118 lines
4.8 KiB
JSON
{
|
|
"module": "omniread.core.content",
|
|
"content": {
|
|
"path": "omniread.core.content",
|
|
"docstring": "Canonical content models for OmniRead.\n\nThis module defines the **format-agnostic content representation** used across\nall parsers and scrapers in OmniRead.\n\nThe models defined here represent *what* was extracted, not *how* it was\nretrieved or parsed. Format-specific behavior and metadata must not alter\nthe semantic meaning of these models.",
|
|
"objects": {
|
|
"Enum": {
|
|
"name": "Enum",
|
|
"kind": "alias",
|
|
"path": "omniread.core.content.Enum",
|
|
"signature": "<bound method Alias.signature of Alias('Enum', 'enum.Enum')>",
|
|
"docstring": null
|
|
},
|
|
"dataclass": {
|
|
"name": "dataclass",
|
|
"kind": "alias",
|
|
"path": "omniread.core.content.dataclass",
|
|
"signature": "<bound method Alias.signature of Alias('dataclass', 'dataclasses.dataclass')>",
|
|
"docstring": null
|
|
},
|
|
"Any": {
|
|
"name": "Any",
|
|
"kind": "alias",
|
|
"path": "omniread.core.content.Any",
|
|
"signature": "<bound method Alias.signature of Alias('Any', 'typing.Any')>",
|
|
"docstring": null
|
|
},
|
|
"Mapping": {
|
|
"name": "Mapping",
|
|
"kind": "alias",
|
|
"path": "omniread.core.content.Mapping",
|
|
"signature": "<bound method Alias.signature of Alias('Mapping', 'typing.Mapping')>",
|
|
"docstring": null
|
|
},
|
|
"Optional": {
|
|
"name": "Optional",
|
|
"kind": "alias",
|
|
"path": "omniread.core.content.Optional",
|
|
"signature": "<bound method Alias.signature of Alias('Optional', 'typing.Optional')>",
|
|
"docstring": null
|
|
},
|
|
"ContentType": {
|
|
"name": "ContentType",
|
|
"kind": "class",
|
|
"path": "omniread.core.content.ContentType",
|
|
"signature": "<bound method Class.signature of Class('ContentType', 17, 36)>",
|
|
"docstring": "Supported MIME types for extracted content.\n\nThis enum represents the declared or inferred media type of the content\nsource. It is primarily used for routing content to the appropriate\nparser or downstream consumer.",
|
|
"members": {
|
|
"HTML": {
|
|
"name": "HTML",
|
|
"kind": "attribute",
|
|
"path": "omniread.core.content.ContentType.HTML",
|
|
"signature": null,
|
|
"docstring": "HTML document content."
|
|
},
|
|
"PDF": {
|
|
"name": "PDF",
|
|
"kind": "attribute",
|
|
"path": "omniread.core.content.ContentType.PDF",
|
|
"signature": null,
|
|
"docstring": "PDF document content."
|
|
},
|
|
"JSON": {
|
|
"name": "JSON",
|
|
"kind": "attribute",
|
|
"path": "omniread.core.content.ContentType.JSON",
|
|
"signature": null,
|
|
"docstring": "JSON document content."
|
|
},
|
|
"XML": {
|
|
"name": "XML",
|
|
"kind": "attribute",
|
|
"path": "omniread.core.content.ContentType.XML",
|
|
"signature": null,
|
|
"docstring": "XML document content."
|
|
}
|
|
}
|
|
},
|
|
"Content": {
|
|
"name": "Content",
|
|
"kind": "class",
|
|
"path": "omniread.core.content.Content",
|
|
"signature": "<bound method Class.signature of Class('Content', 39, 63)>",
|
|
"docstring": "Normalized representation of extracted content.\n\nA `Content` instance represents a raw content payload along with minimal\ncontextual metadata describing its origin and type.\n\nThis class is the **primary exchange format** between:\n- Scrapers\n- Parsers\n- Downstream consumers\n\nAttributes:\n raw: Raw content bytes as retrieved from the source.\n source: Identifier of the content origin (URL, file path, or logical name).\n content_type: Optional MIME type of the content, if known.\n metadata: Optional, implementation-defined metadata associated with\n the content (e.g., headers, encoding hints, extraction notes).",
|
|
"members": {
|
|
"raw": {
|
|
"name": "raw",
|
|
"kind": "attribute",
|
|
"path": "omniread.core.content.Content.raw",
|
|
"signature": null,
|
|
"docstring": null
|
|
},
|
|
"source": {
|
|
"name": "source",
|
|
"kind": "attribute",
|
|
"path": "omniread.core.content.Content.source",
|
|
"signature": null,
|
|
"docstring": null
|
|
},
|
|
"content_type": {
|
|
"name": "content_type",
|
|
"kind": "attribute",
|
|
"path": "omniread.core.content.Content.content_type",
|
|
"signature": null,
|
|
"docstring": null
|
|
},
|
|
"metadata": {
|
|
"name": "metadata",
|
|
"kind": "attribute",
|
|
"path": "omniread.core.content.Content.metadata",
|
|
"signature": null,
|
|
"docstring": null
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} |