{ "module": "omniread.core.content", "content": { "path": "omniread.core.content", "docstring": "Canonical content models for OmniRead.\n\n---\n\n## Summary\n\nThis module defines the **format-agnostic content representation** used across\nall parsers and scrapers in OmniRead.\n\nThe models defined here represent *what* was extracted, not *how* it was\nretrieved or parsed. Format-specific behavior and metadata must not alter\nthe semantic meaning of these models.", "objects": { "Enum": { "name": "Enum", "kind": "alias", "path": "omniread.core.content.Enum", "signature": "", "docstring": null }, "dataclass": { "name": "dataclass", "kind": "alias", "path": "omniread.core.content.dataclass", "signature": "", "docstring": null }, "Any": { "name": "Any", "kind": "alias", "path": "omniread.core.content.Any", "signature": "", "docstring": null }, "Mapping": { "name": "Mapping", "kind": "alias", "path": "omniread.core.content.Mapping", "signature": "", "docstring": null }, "Optional": { "name": "Optional", "kind": "alias", "path": "omniread.core.content.Optional", "signature": "", "docstring": null }, "ContentType": { "name": "ContentType", "kind": "class", "path": "omniread.core.content.ContentType", "signature": "", "docstring": "Supported MIME types for extracted content.\n\nNotes:\n **Guarantees:**\n\n - This enum represents the declared or inferred media type of the content source\n - It is primarily used for routing content to the appropriate parser or downstream consumer", "members": { "HTML": { "name": "HTML", "kind": "attribute", "path": "omniread.core.content.ContentType.HTML", "signature": null, "docstring": "HTML document content." }, "PDF": { "name": "PDF", "kind": "attribute", "path": "omniread.core.content.ContentType.PDF", "signature": null, "docstring": "PDF document content." }, "JSON": { "name": "JSON", "kind": "attribute", "path": "omniread.core.content.ContentType.JSON", "signature": null, "docstring": "JSON document content." }, "XML": { "name": "XML", "kind": "attribute", "path": "omniread.core.content.ContentType.XML", "signature": null, "docstring": "XML document content." } } }, "Content": { "name": "Content", "kind": "class", "path": "omniread.core.content.Content", "signature": "", "docstring": "Normalized representation of extracted content.\n\nNotes:\n **Responsibilities:**\n\n - A `Content` instance represents a raw content payload along with minimal contextual metadata describing its origin and type\n - This class is the primary exchange format between Scrapers, Parsers, and Downstream consumers", "members": { "raw": { "name": "raw", "kind": "attribute", "path": "omniread.core.content.Content.raw", "signature": null, "docstring": "Raw content bytes as retrieved from the source." }, "source": { "name": "source", "kind": "attribute", "path": "omniread.core.content.Content.source", "signature": null, "docstring": "Identifier of the content origin (URL, file path, or logical name)." }, "content_type": { "name": "content_type", "kind": "attribute", "path": "omniread.core.content.Content.content_type", "signature": null, "docstring": "Optional MIME type of the content, if known." }, "metadata": { "name": "metadata", "kind": "attribute", "path": "omniread.core.content.Content.metadata", "signature": null, "docstring": "Optional, implementation-defined metadata associated with the content (e.g., headers, encoding hints, extraction notes)." } } } } } }