omniread/mcp_docs/modules/omniread.pdf.parser.json

{
  "module": "omniread.pdf.parser",
  "content": {
    "path": "omniread.pdf.parser",
    "docstring": "PDF parser base implementations for OmniRead.\n\nThis module defines the **PDF-specific parser contract**, extending the\nformat-agnostic `BaseParser` with constraints appropriate for PDF content.\n\nPDF parsers are responsible for interpreting binary PDF data and producing\nstructured representations suitable for downstream consumption.",
    "objects": {
      "Generic": {
        "name": "Generic",
        "kind": "alias",
        "path": "omniread.pdf.parser.Generic",
        "signature": "<bound method Alias.signature of Alias('Generic', 'typing.Generic')>",
        "docstring": null
      },
      "TypeVar": {
        "name": "TypeVar",
        "kind": "alias",
        "path": "omniread.pdf.parser.TypeVar",
        "signature": "<bound method Alias.signature of Alias('TypeVar', 'typing.TypeVar')>",
        "docstring": null
      },
      "abstractmethod": {
        "name": "abstractmethod",
        "kind": "alias",
        "path": "omniread.pdf.parser.abstractmethod",
        "signature": "<bound method Alias.signature of Alias('abstractmethod', 'abc.abstractmethod')>",
        "docstring": null
      },
      "ContentType": {
        "name": "ContentType",
        "kind": "class",
        "path": "omniread.pdf.parser.ContentType",
        "signature": "<bound method Alias.signature of Alias('ContentType', 'omniread.core.content.ContentType')>",
        "docstring": "Supported MIME types for extracted content.\n\nThis enum represents the declared or inferred media type of the content\nsource. It is primarily used for routing content to the appropriate\nparser or downstream consumer.",
        "members": {
          "HTML": {
            "name": "HTML",
            "kind": "attribute",
            "path": "omniread.pdf.parser.ContentType.HTML",
            "signature": "<bound method Alias.signature of Alias('HTML', 'omniread.core.content.ContentType.HTML')>",
            "docstring": "HTML document content."
          },
          "PDF": {
            "name": "PDF",
            "kind": "attribute",
            "path": "omniread.pdf.parser.ContentType.PDF",
            "signature": "<bound method Alias.signature of Alias('PDF', 'omniread.core.content.ContentType.PDF')>",
            "docstring": "PDF document content."
          },
          "JSON": {
            "name": "JSON",
            "kind": "attribute",
            "path": "omniread.pdf.parser.ContentType.JSON",
            "signature": "<bound method Alias.signature of Alias('JSON', 'omniread.core.content.ContentType.JSON')>",
            "docstring": "JSON document content."
          },
          "XML": {
            "name": "XML",
            "kind": "attribute",
            "path": "omniread.pdf.parser.ContentType.XML",
            "signature": "<bound method Alias.signature of Alias('XML', 'omniread.core.content.ContentType.XML')>",
            "docstring": "XML document content."
          }
        }
      },
      "BaseParser": {
        "name": "BaseParser",
        "kind": "class",
        "path": "omniread.pdf.parser.BaseParser",
        "signature": "<bound method Alias.signature of Alias('BaseParser', 'omniread.core.parser.BaseParser')>",
        "docstring": "Base interface for all parsers.\n\nA parser is a self-contained object that owns the Content\nit is responsible for interpreting.\n\nImplementations must:\n- Declare supported content types via `supported_types`\n- Raise parsing-specific exceptions from `parse()`\n- Remain deterministic for a given input\n\nConsumers may rely on:\n- Early validation of content compatibility\n- Type-stable return values from `parse()`",
        "members": {
          "supported_types": {
            "name": "supported_types",
            "kind": "attribute",
            "path": "omniread.pdf.parser.BaseParser.supported_types",
            "signature": "<bound method Alias.signature of Alias('supported_types', 'omniread.core.parser.BaseParser.supported_types')>",
            "docstring": "Set of content types supported by this parser.\n\nAn empty set indicates that the parser is content-type agnostic."
          },
          "content": {
            "name": "content",
            "kind": "attribute",
            "path": "omniread.pdf.parser.BaseParser.content",
            "signature": "<bound method Alias.signature of Alias('content', 'omniread.core.parser.BaseParser.content')>",
            "docstring": null
          },
          "parse": {
            "name": "parse",
            "kind": "function",
            "path": "omniread.pdf.parser.BaseParser.parse",
            "signature": "<bound method Alias.signature of Alias('parse', 'omniread.core.parser.BaseParser.parse')>",
            "docstring": "Parse the owned content into structured output.\n\nImplementations must fully consume the provided content and\nreturn a deterministic, structured output.\n\nReturns:\n    Parsed, structured representation.\n\nRaises:\n    Exception: Parsing-specific errors as defined by the implementation."
          },
          "supports": {
            "name": "supports",
            "kind": "function",
            "path": "omniread.pdf.parser.BaseParser.supports",
            "signature": "<bound method Alias.signature of Alias('supports', 'omniread.core.parser.BaseParser.supports')>",
            "docstring": "Check whether this parser supports the content's type.\n\nReturns:\n    True if the content type is supported; False otherwise."
          }
        }
      },
      "T": {
        "name": "T",
        "kind": "attribute",
        "path": "omniread.pdf.parser.T",
        "signature": null,
        "docstring": null
      },
      "PDFParser": {
        "name": "PDFParser",
        "kind": "class",
        "path": "omniread.pdf.parser.PDFParser",
        "signature": "<bound method Class.signature of Class('PDFParser', 20, 49)>",
        "docstring": "Base PDF parser.\n\nThis class enforces PDF content-type compatibility and provides the\nextension point for implementing concrete PDF parsing strategies.\n\nConcrete implementations must define:\n- Define the output type `T`\n- Implement the `parse()` method",
        "members": {
          "supported_types": {
            "name": "supported_types",
            "kind": "attribute",
            "path": "omniread.pdf.parser.PDFParser.supported_types",
            "signature": null,
            "docstring": "Set of content types supported by this parser (PDF only)."
          },
          "parse": {
            "name": "parse",
            "kind": "function",
            "path": "omniread.pdf.parser.PDFParser.parse",
            "signature": "<bound method Function.signature of Function('parse', 35, 49)>",
            "docstring": "Parse PDF content into a structured output.\n\nImplementations must fully interpret the PDF binary payload and\nreturn a deterministic, structured output.\n\nReturns:\n    Parsed representation of type `T`.\n\nRaises:\n    Exception: Parsing-specific errors as defined by the implementation."
          }
        }
      }
    }
  }
}