This commit is contained in:
2026-03-08 00:41:28 +05:30
parent 5842e6a227
commit 0fbf0ca0f0
14 changed files with 430 additions and 398 deletions

View File

@@ -2,42 +2,42 @@
"module": "omniread.core",
"content": {
"path": "omniread.core",
"docstring": "Core domain contracts for OmniRead.\n\nThis package defines the **format-agnostic domain layer** of OmniRead.\nIt exposes canonical content models and abstract interfaces that are\nimplemented by format-specific modules (HTML, PDF, etc.).\n\nPublic exports from this package are considered **stable contracts** and\nare safe for downstream consumers to depend on.\n\nSubmodules:\n- content: Canonical content models and enums\n- parser: Abstract parsing contracts\n- scraper: Abstract scraping contracts\n\nFormat-specific behavior must not be introduced at this layer.",
"docstring": "Core domain contracts for OmniRead.\n\n---\n\n## Summary\n\nThis package defines the **format-agnostic domain layer** of OmniRead.\nIt exposes canonical content models and abstract interfaces that are\nimplemented by format-specific modules (HTML, PDF, etc.).\n\nPublic exports from this package are considered **stable contracts** and\nare safe for downstream consumers to depend on.\n\nSubmodules:\n- content: Canonical content models and enums\n- parser: Abstract parsing contracts\n- scraper: Abstract scraping contracts\n\nFormat-specific behavior must not be introduced at this layer.\n\n---\n\n## Public API\n\n Content\n ContentType\n\n---",
"objects": {
"Content": {
"name": "Content",
"kind": "class",
"path": "omniread.core.Content",
"signature": "<bound method Alias.signature of Alias('Content', 'omniread.core.content.Content')>",
"docstring": "Normalized representation of extracted content.\n\nA `Content` instance represents a raw content payload along with minimal\ncontextual metadata describing its origin and type.\n\nThis class is the **primary exchange format** between:\n- Scrapers\n- Parsers\n- Downstream consumers\n\nAttributes:\n raw: Raw content bytes as retrieved from the source.\n source: Identifier of the content origin (URL, file path, or logical name).\n content_type: Optional MIME type of the content, if known.\n metadata: Optional, implementation-defined metadata associated with\n the content (e.g., headers, encoding hints, extraction notes).",
"docstring": "Normalized representation of extracted content.\n\nNotes:\n **Responsibilities:**\n\n - A `Content` instance represents a raw content payload along with minimal contextual metadata describing its origin and type\n - This class is the primary exchange format between Scrapers, Parsers, and Downstream consumers",
"members": {
"raw": {
"name": "raw",
"kind": "attribute",
"path": "omniread.core.Content.raw",
"signature": "<bound method Alias.signature of Alias('raw', 'omniread.core.content.Content.raw')>",
"docstring": null
"docstring": "Raw content bytes as retrieved from the source."
},
"source": {
"name": "source",
"kind": "attribute",
"path": "omniread.core.Content.source",
"signature": "<bound method Alias.signature of Alias('source', 'omniread.core.content.Content.source')>",
"docstring": null
"docstring": "Identifier of the content origin (URL, file path, or logical name)."
},
"content_type": {
"name": "content_type",
"kind": "attribute",
"path": "omniread.core.Content.content_type",
"signature": "<bound method Alias.signature of Alias('content_type', 'omniread.core.content.Content.content_type')>",
"docstring": null
"docstring": "Optional MIME type of the content, if known."
},
"metadata": {
"name": "metadata",
"kind": "attribute",
"path": "omniread.core.Content.metadata",
"signature": "<bound method Alias.signature of Alias('metadata', 'omniread.core.content.Content.metadata')>",
"docstring": null
"docstring": "Optional, implementation-defined metadata associated with the content (e.g., headers, encoding hints, extraction notes)."
}
}
},
@@ -46,7 +46,7 @@
"kind": "class",
"path": "omniread.core.ContentType",
"signature": "<bound method Alias.signature of Alias('ContentType', 'omniread.core.content.ContentType')>",
"docstring": "Supported MIME types for extracted content.\n\nThis enum represents the declared or inferred media type of the content\nsource. It is primarily used for routing content to the appropriate\nparser or downstream consumer.",
"docstring": "Supported MIME types for extracted content.\n\nNotes:\n **Guarantees:**\n\n - This enum represents the declared or inferred media type of the content source\n - It is primarily used for routing content to the appropriate parser or downstream consumer",
"members": {
"HTML": {
"name": "HTML",
@@ -83,14 +83,14 @@
"kind": "class",
"path": "omniread.core.BaseParser",
"signature": "<bound method Alias.signature of Alias('BaseParser', 'omniread.core.parser.BaseParser')>",
"docstring": "Base interface for all parsers.\n\nA parser is a self-contained object that owns the Content\nit is responsible for interpreting.\n\nImplementations must:\n- Declare supported content types via `supported_types`\n- Raise parsing-specific exceptions from `parse()`\n- Remain deterministic for a given input\n\nConsumers may rely on:\n- Early validation of content compatibility\n- Type-stable return values from `parse()`",
"docstring": "Base interface for all parsers.\n\nNotes:\n **Guarantees:**\n\n - A parser is a self-contained object that owns the Content it is responsible for interpreting\n - Consumers may rely on early validation of content compatibility and type-stable return values from `parse()`\n\n **Responsibilities:**\n\n - Implementations must declare supported content types via `supported_types`\n - Implementations must raise parsing-specific exceptions from `parse()`\n - Implementations must remain deterministic for a given input",
"members": {
"supported_types": {
"name": "supported_types",
"kind": "attribute",
"path": "omniread.core.BaseParser.supported_types",
"signature": "<bound method Alias.signature of Alias('supported_types', 'omniread.core.parser.BaseParser.supported_types')>",
"docstring": "Set of content types supported by this parser.\n\nAn empty set indicates that the parser is content-type agnostic."
"docstring": "Set of content types supported by this parser. An empty set indicates that the parser is content-type agnostic."
},
"content": {
"name": "content",
@@ -104,14 +104,14 @@
"kind": "function",
"path": "omniread.core.BaseParser.parse",
"signature": "<bound method Alias.signature of Alias('parse', 'omniread.core.parser.BaseParser.parse')>",
"docstring": "Parse the owned content into structured output.\n\nImplementations must fully consume the provided content and\nreturn a deterministic, structured output.\n\nReturns:\n Parsed, structured representation.\n\nRaises:\n Exception: Parsing-specific errors as defined by the implementation."
"docstring": "Parse the owned content into structured output.\n\nReturns:\n T:\n Parsed, structured representation.\n\nRaises:\n Exception:\n Parsing-specific errors as defined by the implementation.\n\nNotes:\n **Responsibilities:**\n\n - Implementations must fully consume the provided content and return a deterministic, structured output"
},
"supports": {
"name": "supports",
"kind": "function",
"path": "omniread.core.BaseParser.supports",
"signature": "<bound method Alias.signature of Alias('supports', 'omniread.core.parser.BaseParser.supports')>",
"docstring": "Check whether this parser supports the content's type.\n\nReturns:\n True if the content type is supported; False otherwise."
"docstring": "Check whether this parser supports the content's type.\n\nReturns:\n bool:\n True if the content type is supported; False otherwise."
}
}
},
@@ -120,14 +120,14 @@
"kind": "class",
"path": "omniread.core.BaseScraper",
"signature": "<bound method Alias.signature of Alias('BaseScraper', 'omniread.core.scraper.BaseScraper')>",
"docstring": "Base interface for all scrapers.\n\nA scraper is responsible ONLY for fetching raw content\n(bytes) from a source. It must not interpret or parse it.\n\nA scraper is a **stateless acquisition component** that retrieves raw\ncontent from a source and returns it as a `Content` object.\n\nScrapers define *how content is obtained*, not *what the content means*.\n\nImplementations may vary in:\n- Transport mechanism (HTTP, filesystem, cloud storage)\n- Authentication strategy\n- Retry and backoff behavior\n\nImplementations must not:\n- Parse content\n- Modify content semantics\n- Couple scraping logic to a specific parser",
"docstring": "Base interface for all scrapers.\n\nNotes:\n **Responsibilities:**\n\n - A scraper is responsible ONLY for fetching raw content (bytes) from a source. It must not interpret or parse it\n - A scraper is a stateless acquisition component that retrieves raw content from a source and returns it as a `Content` object\n - Scrapers define how content is obtained, not what the content means\n - Implementations may vary in transport mechanism, authentication strategy, retry and backoff behavior\n\n **Constraints:**\n\n - Implementations must not parse content, modify content semantics, or couple scraping logic to a specific parser",
"members": {
"fetch": {
"name": "fetch",
"kind": "function",
"path": "omniread.core.BaseScraper.fetch",
"signature": "<bound method Alias.signature of Alias('fetch', 'omniread.core.scraper.BaseScraper.fetch')>",
"docstring": "Fetch raw content from the given source.\n\nImplementations must retrieve the content referenced by `source`\nand return it as raw bytes wrapped in a `Content` object.\n\nArgs:\n source: Location identifier (URL, file path, S3 URI, etc.)\n metadata: Optional hints for the scraper (headers, auth, etc.)\n\nReturns:\n Content object containing raw bytes and metadata.\n - Raw content bytes\n - Source identifier\n - Optional metadata\n\nRaises:\n Exception: Retrieval-specific errors as defined by the implementation."
"docstring": "Fetch raw content from the given source.\n\nArgs:\n source (str):\n Location identifier (URL, file path, S3 URI, etc.)\n metadata (Optional[Mapping[str, Any]], optional):\n Optional hints for the scraper (headers, auth, etc.)\n\nReturns:\n Content:\n Content object containing raw bytes and metadata.\n\nRaises:\n Exception:\n Retrieval-specific errors as defined by the implementation.\n\nNotes:\n **Responsibilities:**\n\n - Implementations must retrieve the content referenced by `source` and return it as raw bytes wrapped in a `Content` object"
}
}
},
@@ -136,7 +136,7 @@
"kind": "module",
"path": "omniread.core.content",
"signature": null,
"docstring": "Canonical content models for OmniRead.\n\nThis module defines the **format-agnostic content representation** used across\nall parsers and scrapers in OmniRead.\n\nThe models defined here represent *what* was extracted, not *how* it was\nretrieved or parsed. Format-specific behavior and metadata must not alter\nthe semantic meaning of these models.",
"docstring": "Canonical content models for OmniRead.\n\n---\n\n## Summary\n\nThis module defines the **format-agnostic content representation** used across\nall parsers and scrapers in OmniRead.\n\nThe models defined here represent *what* was extracted, not *how* it was\nretrieved or parsed. Format-specific behavior and metadata must not alter\nthe semantic meaning of these models.",
"members": {
"Enum": {
"name": "Enum",
@@ -177,8 +177,8 @@
"name": "ContentType",
"kind": "class",
"path": "omniread.core.content.ContentType",
"signature": "<bound method Class.signature of Class('ContentType', 17, 36)>",
"docstring": "Supported MIME types for extracted content.\n\nThis enum represents the declared or inferred media type of the content\nsource. It is primarily used for routing content to the appropriate\nparser or downstream consumer.",
"signature": "<bound method Class.signature of Class('ContentType', 21, 42)>",
"docstring": "Supported MIME types for extracted content.\n\nNotes:\n **Guarantees:**\n\n - This enum represents the declared or inferred media type of the content source\n - It is primarily used for routing content to the appropriate parser or downstream consumer",
"members": {
"HTML": {
"name": "HTML",
@@ -214,36 +214,36 @@
"name": "Content",
"kind": "class",
"path": "omniread.core.content.Content",
"signature": "<bound method Class.signature of Class('Content', 39, 63)>",
"docstring": "Normalized representation of extracted content.\n\nA `Content` instance represents a raw content payload along with minimal\ncontextual metadata describing its origin and type.\n\nThis class is the **primary exchange format** between:\n- Scrapers\n- Parsers\n- Downstream consumers\n\nAttributes:\n raw: Raw content bytes as retrieved from the source.\n source: Identifier of the content origin (URL, file path, or logical name).\n content_type: Optional MIME type of the content, if known.\n metadata: Optional, implementation-defined metadata associated with\n the content (e.g., headers, encoding hints, extraction notes).",
"signature": "<bound method Class.signature of Class('Content', 45, 75)>",
"docstring": "Normalized representation of extracted content.\n\nNotes:\n **Responsibilities:**\n\n - A `Content` instance represents a raw content payload along with minimal contextual metadata describing its origin and type\n - This class is the primary exchange format between Scrapers, Parsers, and Downstream consumers",
"members": {
"raw": {
"name": "raw",
"kind": "attribute",
"path": "omniread.core.content.Content.raw",
"signature": null,
"docstring": null
"docstring": "Raw content bytes as retrieved from the source."
},
"source": {
"name": "source",
"kind": "attribute",
"path": "omniread.core.content.Content.source",
"signature": null,
"docstring": null
"docstring": "Identifier of the content origin (URL, file path, or logical name)."
},
"content_type": {
"name": "content_type",
"kind": "attribute",
"path": "omniread.core.content.Content.content_type",
"signature": null,
"docstring": null
"docstring": "Optional MIME type of the content, if known."
},
"metadata": {
"name": "metadata",
"kind": "attribute",
"path": "omniread.core.content.Content.metadata",
"signature": null,
"docstring": null
"docstring": "Optional, implementation-defined metadata associated with the content (e.g., headers, encoding hints, extraction notes)."
}
}
}
@@ -254,7 +254,7 @@
"kind": "module",
"path": "omniread.core.parser",
"signature": null,
"docstring": "Abstract parsing contracts for OmniRead.\n\nThis module defines the **format-agnostic parser interface** used to transform\nraw content into structured, typed representations.\n\nParsers are responsible for:\n- Interpreting a single `Content` instance\n- Validating compatibility with the content type\n- Producing a structured output suitable for downstream consumers\n\nParsers are not responsible for:\n- Fetching or acquiring content\n- Performing retries or error recovery\n- Managing multiple content sources",
"docstring": "Abstract parsing contracts for OmniRead.\n\n---\n\n## Summary\n\nThis module defines the **format-agnostic parser interface** used to transform\nraw content into structured, typed representations.\n\nParsers are responsible for:\n- Interpreting a single `Content` instance\n- Validating compatibility with the content type\n- Producing a structured output suitable for downstream consumers\n\nParsers are not responsible for:\n- Fetching or acquiring content\n- Performing retries or error recovery\n- Managing multiple content sources",
"members": {
"ABC": {
"name": "ABC",
@@ -296,35 +296,35 @@
"kind": "class",
"path": "omniread.core.parser.Content",
"signature": "<bound method Alias.signature of Alias('Content', 'omniread.core.content.Content')>",
"docstring": "Normalized representation of extracted content.\n\nA `Content` instance represents a raw content payload along with minimal\ncontextual metadata describing its origin and type.\n\nThis class is the **primary exchange format** between:\n- Scrapers\n- Parsers\n- Downstream consumers\n\nAttributes:\n raw: Raw content bytes as retrieved from the source.\n source: Identifier of the content origin (URL, file path, or logical name).\n content_type: Optional MIME type of the content, if known.\n metadata: Optional, implementation-defined metadata associated with\n the content (e.g., headers, encoding hints, extraction notes).",
"docstring": "Normalized representation of extracted content.\n\nNotes:\n **Responsibilities:**\n\n - A `Content` instance represents a raw content payload along with minimal contextual metadata describing its origin and type\n - This class is the primary exchange format between Scrapers, Parsers, and Downstream consumers",
"members": {
"raw": {
"name": "raw",
"kind": "attribute",
"path": "omniread.core.parser.Content.raw",
"signature": "<bound method Alias.signature of Alias('raw', 'omniread.core.content.Content.raw')>",
"docstring": null
"docstring": "Raw content bytes as retrieved from the source."
},
"source": {
"name": "source",
"kind": "attribute",
"path": "omniread.core.parser.Content.source",
"signature": "<bound method Alias.signature of Alias('source', 'omniread.core.content.Content.source')>",
"docstring": null
"docstring": "Identifier of the content origin (URL, file path, or logical name)."
},
"content_type": {
"name": "content_type",
"kind": "attribute",
"path": "omniread.core.parser.Content.content_type",
"signature": "<bound method Alias.signature of Alias('content_type', 'omniread.core.content.Content.content_type')>",
"docstring": null
"docstring": "Optional MIME type of the content, if known."
},
"metadata": {
"name": "metadata",
"kind": "attribute",
"path": "omniread.core.parser.Content.metadata",
"signature": "<bound method Alias.signature of Alias('metadata', 'omniread.core.content.Content.metadata')>",
"docstring": null
"docstring": "Optional, implementation-defined metadata associated with the content (e.g., headers, encoding hints, extraction notes)."
}
}
},
@@ -333,7 +333,7 @@
"kind": "class",
"path": "omniread.core.parser.ContentType",
"signature": "<bound method Alias.signature of Alias('ContentType', 'omniread.core.content.ContentType')>",
"docstring": "Supported MIME types for extracted content.\n\nThis enum represents the declared or inferred media type of the content\nsource. It is primarily used for routing content to the appropriate\nparser or downstream consumer.",
"docstring": "Supported MIME types for extracted content.\n\nNotes:\n **Guarantees:**\n\n - This enum represents the declared or inferred media type of the content source\n - It is primarily used for routing content to the appropriate parser or downstream consumer",
"members": {
"HTML": {
"name": "HTML",
@@ -376,15 +376,15 @@
"name": "BaseParser",
"kind": "class",
"path": "omniread.core.parser.BaseParser",
"signature": "<bound method Class.signature of Class('BaseParser', 26, 98)>",
"docstring": "Base interface for all parsers.\n\nA parser is a self-contained object that owns the Content\nit is responsible for interpreting.\n\nImplementations must:\n- Declare supported content types via `supported_types`\n- Raise parsing-specific exceptions from `parse()`\n- Remain deterministic for a given input\n\nConsumers may rely on:\n- Early validation of content compatibility\n- Type-stable return values from `parse()`",
"signature": "<bound method Class.signature of Class('BaseParser', 30, 108)>",
"docstring": "Base interface for all parsers.\n\nNotes:\n **Guarantees:**\n\n - A parser is a self-contained object that owns the Content it is responsible for interpreting\n - Consumers may rely on early validation of content compatibility and type-stable return values from `parse()`\n\n **Responsibilities:**\n\n - Implementations must declare supported content types via `supported_types`\n - Implementations must raise parsing-specific exceptions from `parse()`\n - Implementations must remain deterministic for a given input",
"members": {
"supported_types": {
"name": "supported_types",
"kind": "attribute",
"path": "omniread.core.parser.BaseParser.supported_types",
"signature": null,
"docstring": "Set of content types supported by this parser.\n\nAn empty set indicates that the parser is content-type agnostic."
"docstring": "Set of content types supported by this parser. An empty set indicates that the parser is content-type agnostic."
},
"content": {
"name": "content",
@@ -397,15 +397,15 @@
"name": "parse",
"kind": "function",
"path": "omniread.core.parser.BaseParser.parse",
"signature": "<bound method Function.signature of Function('parse', 68, 82)>",
"docstring": "Parse the owned content into structured output.\n\nImplementations must fully consume the provided content and\nreturn a deterministic, structured output.\n\nReturns:\n Parsed, structured representation.\n\nRaises:\n Exception: Parsing-specific errors as defined by the implementation."
"signature": "<bound method Function.signature of Function('parse', 73, 91)>",
"docstring": "Parse the owned content into structured output.\n\nReturns:\n T:\n Parsed, structured representation.\n\nRaises:\n Exception:\n Parsing-specific errors as defined by the implementation.\n\nNotes:\n **Responsibilities:**\n\n - Implementations must fully consume the provided content and return a deterministic, structured output"
},
"supports": {
"name": "supports",
"kind": "function",
"path": "omniread.core.parser.BaseParser.supports",
"signature": "<bound method Function.signature of Function('supports', 84, 98)>",
"docstring": "Check whether this parser supports the content's type.\n\nReturns:\n True if the content type is supported; False otherwise."
"signature": "<bound method Function.signature of Function('supports', 93, 108)>",
"docstring": "Check whether this parser supports the content's type.\n\nReturns:\n bool:\n True if the content type is supported; False otherwise."
}
}
}
@@ -416,7 +416,7 @@
"kind": "module",
"path": "omniread.core.scraper",
"signature": null,
"docstring": "Abstract scraping contracts for OmniRead.\n\nThis module defines the **format-agnostic scraper interface** responsible for\nacquiring raw content from external sources.\n\nScrapers are responsible for:\n- Locating and retrieving raw content bytes\n- Attaching minimal contextual metadata\n- Returning normalized `Content` objects\n\nScrapers are explicitly NOT responsible for:\n- Parsing or interpreting content\n- Inferring structure or semantics\n- Performing content-type specific processing\n\nAll interpretation must be delegated to parsers.",
"docstring": "Abstract scraping contracts for OmniRead.\n\n---\n\n## Summary\n\nThis module defines the **format-agnostic scraper interface** responsible for\nacquiring raw content from external sources.\n\nScrapers are responsible for:\n- Locating and retrieving raw content bytes\n- Attaching minimal contextual metadata\n- Returning normalized `Content` objects\n\nScrapers are explicitly NOT responsible for:\n- Parsing or interpreting content\n- Inferring structure or semantics\n- Performing content-type specific processing\n\nAll interpretation must be delegated to parsers.",
"members": {
"ABC": {
"name": "ABC",
@@ -458,35 +458,35 @@
"kind": "class",
"path": "omniread.core.scraper.Content",
"signature": "<bound method Alias.signature of Alias('Content', 'omniread.core.content.Content')>",
"docstring": "Normalized representation of extracted content.\n\nA `Content` instance represents a raw content payload along with minimal\ncontextual metadata describing its origin and type.\n\nThis class is the **primary exchange format** between:\n- Scrapers\n- Parsers\n- Downstream consumers\n\nAttributes:\n raw: Raw content bytes as retrieved from the source.\n source: Identifier of the content origin (URL, file path, or logical name).\n content_type: Optional MIME type of the content, if known.\n metadata: Optional, implementation-defined metadata associated with\n the content (e.g., headers, encoding hints, extraction notes).",
"docstring": "Normalized representation of extracted content.\n\nNotes:\n **Responsibilities:**\n\n - A `Content` instance represents a raw content payload along with minimal contextual metadata describing its origin and type\n - This class is the primary exchange format between Scrapers, Parsers, and Downstream consumers",
"members": {
"raw": {
"name": "raw",
"kind": "attribute",
"path": "omniread.core.scraper.Content.raw",
"signature": "<bound method Alias.signature of Alias('raw', 'omniread.core.content.Content.raw')>",
"docstring": null
"docstring": "Raw content bytes as retrieved from the source."
},
"source": {
"name": "source",
"kind": "attribute",
"path": "omniread.core.scraper.Content.source",
"signature": "<bound method Alias.signature of Alias('source', 'omniread.core.content.Content.source')>",
"docstring": null
"docstring": "Identifier of the content origin (URL, file path, or logical name)."
},
"content_type": {
"name": "content_type",
"kind": "attribute",
"path": "omniread.core.scraper.Content.content_type",
"signature": "<bound method Alias.signature of Alias('content_type', 'omniread.core.content.Content.content_type')>",
"docstring": null
"docstring": "Optional MIME type of the content, if known."
},
"metadata": {
"name": "metadata",
"kind": "attribute",
"path": "omniread.core.scraper.Content.metadata",
"signature": "<bound method Alias.signature of Alias('metadata', 'omniread.core.content.Content.metadata')>",
"docstring": null
"docstring": "Optional, implementation-defined metadata associated with the content (e.g., headers, encoding hints, extraction notes)."
}
}
},
@@ -494,15 +494,15 @@
"name": "BaseScraper",
"kind": "class",
"path": "omniread.core.scraper.BaseScraper",
"signature": "<bound method Class.signature of Class('BaseScraper', 26, 75)>",
"docstring": "Base interface for all scrapers.\n\nA scraper is responsible ONLY for fetching raw content\n(bytes) from a source. It must not interpret or parse it.\n\nA scraper is a **stateless acquisition component** that retrieves raw\ncontent from a source and returns it as a `Content` object.\n\nScrapers define *how content is obtained*, not *what the content means*.\n\nImplementations may vary in:\n- Transport mechanism (HTTP, filesystem, cloud storage)\n- Authentication strategy\n- Retry and backoff behavior\n\nImplementations must not:\n- Parse content\n- Modify content semantics\n- Couple scraping logic to a specific parser",
"signature": "<bound method Class.signature of Class('BaseScraper', 30, 76)>",
"docstring": "Base interface for all scrapers.\n\nNotes:\n **Responsibilities:**\n\n - A scraper is responsible ONLY for fetching raw content (bytes) from a source. It must not interpret or parse it\n - A scraper is a stateless acquisition component that retrieves raw content from a source and returns it as a `Content` object\n - Scrapers define how content is obtained, not what the content means\n - Implementations may vary in transport mechanism, authentication strategy, retry and backoff behavior\n\n **Constraints:**\n\n - Implementations must not parse content, modify content semantics, or couple scraping logic to a specific parser",
"members": {
"fetch": {
"name": "fetch",
"kind": "function",
"path": "omniread.core.scraper.BaseScraper.fetch",
"signature": "<bound method Function.signature of Function('fetch', 49, 75)>",
"docstring": "Fetch raw content from the given source.\n\nImplementations must retrieve the content referenced by `source`\nand return it as raw bytes wrapped in a `Content` object.\n\nArgs:\n source: Location identifier (URL, file path, S3 URI, etc.)\n metadata: Optional hints for the scraper (headers, auth, etc.)\n\nReturns:\n Content object containing raw bytes and metadata.\n - Raw content bytes\n - Source identifier\n - Optional metadata\n\nRaises:\n Exception: Retrieval-specific errors as defined by the implementation."
"signature": "<bound method Function.signature of Function('fetch', 47, 76)>",
"docstring": "Fetch raw content from the given source.\n\nArgs:\n source (str):\n Location identifier (URL, file path, S3 URI, etc.)\n metadata (Optional[Mapping[str, Any]], optional):\n Optional hints for the scraper (headers, auth, etc.)\n\nReturns:\n Content:\n Content object containing raw bytes and metadata.\n\nRaises:\n Exception:\n Retrieval-specific errors as defined by the implementation.\n\nNotes:\n **Responsibilities:**\n\n - Implementations must retrieve the content referenced by `source` and return it as raw bytes wrapped in a `Content` object"
}
}
}