Files
omniread/mcp_docs/modules/omniread.core.parser.json
Vishesh 'ironeagle' Bangotra 67a3074ab4 using doc-forge (#1)
Reviewed-on: #1
Co-authored-by: Vishesh 'ironeagle' Bangotra <aetoskia@gmail.com>
Co-committed-by: Vishesh 'ironeagle' Bangotra <aetoskia@gmail.com>
2026-01-22 11:27:56 +00:00

162 lines
8.1 KiB
JSON

{
"module": "omniread.core.parser",
"content": {
"path": "omniread.core.parser",
"docstring": "Abstract parsing contracts for OmniRead.\n\nThis module defines the **format-agnostic parser interface** used to transform\nraw content into structured, typed representations.\n\nParsers are responsible for:\n- Interpreting a single `Content` instance\n- Validating compatibility with the content type\n- Producing a structured output suitable for downstream consumers\n\nParsers are not responsible for:\n- Fetching or acquiring content\n- Performing retries or error recovery\n- Managing multiple content sources",
"objects": {
"ABC": {
"name": "ABC",
"kind": "alias",
"path": "omniread.core.parser.ABC",
"signature": "<bound method Alias.signature of Alias('ABC', 'abc.ABC')>",
"docstring": null
},
"abstractmethod": {
"name": "abstractmethod",
"kind": "alias",
"path": "omniread.core.parser.abstractmethod",
"signature": "<bound method Alias.signature of Alias('abstractmethod', 'abc.abstractmethod')>",
"docstring": null
},
"Generic": {
"name": "Generic",
"kind": "alias",
"path": "omniread.core.parser.Generic",
"signature": "<bound method Alias.signature of Alias('Generic', 'typing.Generic')>",
"docstring": null
},
"TypeVar": {
"name": "TypeVar",
"kind": "alias",
"path": "omniread.core.parser.TypeVar",
"signature": "<bound method Alias.signature of Alias('TypeVar', 'typing.TypeVar')>",
"docstring": null
},
"Set": {
"name": "Set",
"kind": "alias",
"path": "omniread.core.parser.Set",
"signature": "<bound method Alias.signature of Alias('Set', 'typing.Set')>",
"docstring": null
},
"Content": {
"name": "Content",
"kind": "class",
"path": "omniread.core.parser.Content",
"signature": "<bound method Alias.signature of Alias('Content', 'omniread.core.content.Content')>",
"docstring": "Normalized representation of extracted content.\n\nA `Content` instance represents a raw content payload along with minimal\ncontextual metadata describing its origin and type.\n\nThis class is the **primary exchange format** between:\n- Scrapers\n- Parsers\n- Downstream consumers\n\nAttributes:\n raw: Raw content bytes as retrieved from the source.\n source: Identifier of the content origin (URL, file path, or logical name).\n content_type: Optional MIME type of the content, if known.\n metadata: Optional, implementation-defined metadata associated with\n the content (e.g., headers, encoding hints, extraction notes).",
"members": {
"raw": {
"name": "raw",
"kind": "attribute",
"path": "omniread.core.parser.Content.raw",
"signature": "<bound method Alias.signature of Alias('raw', 'omniread.core.content.Content.raw')>",
"docstring": null
},
"source": {
"name": "source",
"kind": "attribute",
"path": "omniread.core.parser.Content.source",
"signature": "<bound method Alias.signature of Alias('source', 'omniread.core.content.Content.source')>",
"docstring": null
},
"content_type": {
"name": "content_type",
"kind": "attribute",
"path": "omniread.core.parser.Content.content_type",
"signature": "<bound method Alias.signature of Alias('content_type', 'omniread.core.content.Content.content_type')>",
"docstring": null
},
"metadata": {
"name": "metadata",
"kind": "attribute",
"path": "omniread.core.parser.Content.metadata",
"signature": "<bound method Alias.signature of Alias('metadata', 'omniread.core.content.Content.metadata')>",
"docstring": null
}
}
},
"ContentType": {
"name": "ContentType",
"kind": "class",
"path": "omniread.core.parser.ContentType",
"signature": "<bound method Alias.signature of Alias('ContentType', 'omniread.core.content.ContentType')>",
"docstring": "Supported MIME types for extracted content.\n\nThis enum represents the declared or inferred media type of the content\nsource. It is primarily used for routing content to the appropriate\nparser or downstream consumer.",
"members": {
"HTML": {
"name": "HTML",
"kind": "attribute",
"path": "omniread.core.parser.ContentType.HTML",
"signature": "<bound method Alias.signature of Alias('HTML', 'omniread.core.content.ContentType.HTML')>",
"docstring": "HTML document content."
},
"PDF": {
"name": "PDF",
"kind": "attribute",
"path": "omniread.core.parser.ContentType.PDF",
"signature": "<bound method Alias.signature of Alias('PDF', 'omniread.core.content.ContentType.PDF')>",
"docstring": "PDF document content."
},
"JSON": {
"name": "JSON",
"kind": "attribute",
"path": "omniread.core.parser.ContentType.JSON",
"signature": "<bound method Alias.signature of Alias('JSON', 'omniread.core.content.ContentType.JSON')>",
"docstring": "JSON document content."
},
"XML": {
"name": "XML",
"kind": "attribute",
"path": "omniread.core.parser.ContentType.XML",
"signature": "<bound method Alias.signature of Alias('XML', 'omniread.core.content.ContentType.XML')>",
"docstring": "XML document content."
}
}
},
"T": {
"name": "T",
"kind": "attribute",
"path": "omniread.core.parser.T",
"signature": null,
"docstring": null
},
"BaseParser": {
"name": "BaseParser",
"kind": "class",
"path": "omniread.core.parser.BaseParser",
"signature": "<bound method Class.signature of Class('BaseParser', 26, 98)>",
"docstring": "Base interface for all parsers.\n\nA parser is a self-contained object that owns the Content\nit is responsible for interpreting.\n\nImplementations must:\n- Declare supported content types via `supported_types`\n- Raise parsing-specific exceptions from `parse()`\n- Remain deterministic for a given input\n\nConsumers may rely on:\n- Early validation of content compatibility\n- Type-stable return values from `parse()`",
"members": {
"supported_types": {
"name": "supported_types",
"kind": "attribute",
"path": "omniread.core.parser.BaseParser.supported_types",
"signature": null,
"docstring": "Set of content types supported by this parser.\n\nAn empty set indicates that the parser is content-type agnostic."
},
"content": {
"name": "content",
"kind": "attribute",
"path": "omniread.core.parser.BaseParser.content",
"signature": null,
"docstring": null
},
"parse": {
"name": "parse",
"kind": "function",
"path": "omniread.core.parser.BaseParser.parse",
"signature": "<bound method Function.signature of Function('parse', 68, 82)>",
"docstring": "Parse the owned content into structured output.\n\nImplementations must fully consume the provided content and\nreturn a deterministic, structured output.\n\nReturns:\n Parsed, structured representation.\n\nRaises:\n Exception: Parsing-specific errors as defined by the implementation."
},
"supports": {
"name": "supports",
"kind": "function",
"path": "omniread.core.parser.BaseParser.supports",
"signature": "<bound method Function.signature of Function('supports', 84, 98)>",
"docstring": "Check whether this parser supports the content's type.\n\nReturns:\n True if the content type is supported; False otherwise."
}
}
}
}
}
}