{ "module": "omniread.core.content", "content": { "path": "omniread.core.content", "docstring": "Canonical content models for OmniRead.\n\nThis module defines the **format-agnostic content representation** used across\nall parsers and scrapers in OmniRead.\n\nThe models defined here represent *what* was extracted, not *how* it was\nretrieved or parsed. Format-specific behavior and metadata must not alter\nthe semantic meaning of these models.", "objects": { "Enum": { "name": "Enum", "kind": "alias", "path": "omniread.core.content.Enum", "signature": "", "docstring": null }, "dataclass": { "name": "dataclass", "kind": "alias", "path": "omniread.core.content.dataclass", "signature": "", "docstring": null }, "Any": { "name": "Any", "kind": "alias", "path": "omniread.core.content.Any", "signature": "", "docstring": null }, "Mapping": { "name": "Mapping", "kind": "alias", "path": "omniread.core.content.Mapping", "signature": "", "docstring": null }, "Optional": { "name": "Optional", "kind": "alias", "path": "omniread.core.content.Optional", "signature": "", "docstring": null }, "ContentType": { "name": "ContentType", "kind": "class", "path": "omniread.core.content.ContentType", "signature": "", "docstring": "Supported MIME types for extracted content.\n\nThis enum represents the declared or inferred media type of the content\nsource. It is primarily used for routing content to the appropriate\nparser or downstream consumer.", "members": { "HTML": { "name": "HTML", "kind": "attribute", "path": "omniread.core.content.ContentType.HTML", "signature": null, "docstring": "HTML document content." }, "PDF": { "name": "PDF", "kind": "attribute", "path": "omniread.core.content.ContentType.PDF", "signature": null, "docstring": "PDF document content." }, "JSON": { "name": "JSON", "kind": "attribute", "path": "omniread.core.content.ContentType.JSON", "signature": null, "docstring": "JSON document content." }, "XML": { "name": "XML", "kind": "attribute", "path": "omniread.core.content.ContentType.XML", "signature": null, "docstring": "XML document content." } } }, "Content": { "name": "Content", "kind": "class", "path": "omniread.core.content.Content", "signature": "", "docstring": "Normalized representation of extracted content.\n\nA `Content` instance represents a raw content payload along with minimal\ncontextual metadata describing its origin and type.\n\nThis class is the **primary exchange format** between:\n- Scrapers\n- Parsers\n- Downstream consumers\n\nAttributes:\n raw: Raw content bytes as retrieved from the source.\n source: Identifier of the content origin (URL, file path, or logical name).\n content_type: Optional MIME type of the content, if known.\n metadata: Optional, implementation-defined metadata associated with\n the content (e.g., headers, encoding hints, extraction notes).", "members": { "raw": { "name": "raw", "kind": "attribute", "path": "omniread.core.content.Content.raw", "signature": null, "docstring": null }, "source": { "name": "source", "kind": "attribute", "path": "omniread.core.content.Content.source", "signature": null, "docstring": null }, "content_type": { "name": "content_type", "kind": "attribute", "path": "omniread.core.content.Content.content_type", "signature": null, "docstring": null }, "metadata": { "name": "metadata", "kind": "attribute", "path": "omniread.core.content.Content.metadata", "signature": null, "docstring": null } } } } } }