omniread/omniread/pdf/client.py

from abc import ABC, abstractmethod
from pathlib import Path


class BasePDFClient(ABC):
    """
    Abstract client responsible for retrieving PDF bytes
    from a specific backing store (filesystem, S3, FTP, etc).
    """

    @abstractmethod
    def fetch(self, source: str) -> bytes:
        """
        Fetch raw PDF bytes from the given source.
        """
        raise NotImplementedError


class FileSystemPDFClient(BasePDFClient):
    """
    PDF client that reads from the local filesystem.
    """

    def fetch(self, path: Path) -> bytes:

        if not path.exists():
            raise FileNotFoundError(f"PDF not found: {path}")

        if not path.is_file():
            raise ValueError(f"Path is not a file: {path}")

        return path.read_bytes()