# Nix expressions for marker-pdf and its missing dependencies. { pkgs }: let python3Packages = pkgs.python3Packages; # pypdfium2 4.30.0 — pinned because pdftext and surya-ocr require v4.x API. # Installed from the manylinux wheel which bundles libpdfium. pypdfium2 = python3Packages.buildPythonPackage rec { pname = "pypdfium2"; version = "4.30.0"; format = "wheel"; src = pkgs.fetchurl { url = "https://files.pythonhosted.org/packages/65/cd/3f1edf20a0ef4a212a5e20a5900e64942c5a374473671ac0780eaa08ea80/pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"; hash = "sha256-8feNIYng3fmsK3qbm9Twxm9U0Tif9sF+n9ncA00G6z8="; }; nativeBuildInputs = [ pkgs.autoPatchelfHook ]; buildInputs = [ pkgs.stdenv.cc.cc.lib ]; pythonImportsCheck = [ "pypdfium2" ]; }; pdftext = python3Packages.buildPythonPackage rec { pname = "pdftext"; version = "0.6.3"; pyproject = true; src = pkgs.fetchPypi { inherit pname version; hash = "sha256-q1xd/g8ft43h24N8ytrB6kGwfOGJD+rZc8moTNr1Tew="; }; build-system = [ python3Packages.poetry-core ]; nativeBuildInputs = [ python3Packages.pythonRelaxDepsHook ]; pythonRelaxDeps = true; dependencies = [ pypdfium2 python3Packages.pydantic python3Packages.pydantic-settings python3Packages.click ]; # Tests require PDF fixtures not included in the sdist doCheck = false; pythonImportsCheck = [ "pdftext" ]; }; surya-ocr = python3Packages.buildPythonPackage rec { pname = "surya-ocr"; version = "0.17.1"; pyproject = true; src = pkgs.fetchPypi { pname = "surya_ocr"; inherit version; hash = "sha256-NJ142FTB7V+Bblg1Re1kUaoLxpkuKDqAUDR5mqzujCQ="; }; build-system = [ python3Packages.poetry-core ]; nativeBuildInputs = [ python3Packages.pythonRelaxDepsHook ]; pythonRelaxDeps = true; pythonRemoveDeps = [ "pre-commit" ]; dependencies = [ python3Packages.transformers python3Packages.torch python3Packages.pydantic python3Packages.pydantic-settings python3Packages.python-dotenv python3Packages.pillow pypdfium2 python3Packages.filetype python3Packages.click python3Packages.platformdirs python3Packages.opencv-python-headless python3Packages.einops ]; # Tests require model weights and GPU doCheck = false; pythonImportsCheck = [ "surya" ]; }; marker-pdf = python3Packages.buildPythonPackage rec { pname = "marker-pdf"; version = "1.10.2"; pyproject = true; src = pkgs.fetchPypi { pname = "marker_pdf"; inherit version; hash = "sha256-zg/IOeEa11GaV20lTKnVGg+UVLnX2gIhH3IrFBMX+fE="; }; build-system = [ python3Packages.poetry-core ]; nativeBuildInputs = [ python3Packages.pythonRelaxDepsHook ]; pythonRelaxDeps = true; pythonRemoveDeps = [ "pre-commit" ]; dependencies = [ python3Packages.pillow python3Packages.pydantic python3Packages.pydantic-settings python3Packages.transformers python3Packages.python-dotenv python3Packages.torch python3Packages.tqdm python3Packages.ftfy python3Packages.rapidfuzz surya-ocr python3Packages.regex pdftext python3Packages.markdownify python3Packages.click python3Packages.markdown2 python3Packages.filetype python3Packages.google-genai python3Packages.anthropic python3Packages.scikit-learn python3Packages.openai ]; # Tests require model weights doCheck = false; pythonImportsCheck = [ "marker" ]; }; in { inherit pypdfium2 pdftext surya-ocr marker-pdf; # Python environment with marker_single on PATH markerEnv = python3Packages.python.withPackages (_: [ marker-pdf ]); }