Loading pkgs/development/python-modules/trafilatura/default.nix 0 → 100644 +67 −0 Original line number Diff line number Diff line { lib , buildPythonPackage , fetchPypi , pytestCheckHook , pythonOlder , certifi , charset-normalizer , courlan , htmldate , justext , lxml , urllib3 }: buildPythonPackage rec { pname = "trafilatura"; version = "1.6.3"; format = "setuptools"; disabled = pythonOlder "3.6"; src = fetchPypi { inherit pname version; hash = "sha256-Zx3W4AAOEBxLzo1w9ECLy3n8vyJ17iVZHv4z4sihYA0="; }; propagatedBuildInputs = [ certifi charset-normalizer courlan htmldate justext lxml urllib3 ]; nativeCheckInputs = [ pytestCheckHook ]; # disable tests that require an internet connection disabledTests = [ "test_download" "test_fetch" "test_redirection" "test_meta_redirections" "test_crawl_page" "test_whole" "test_probing" "test_cli_pipeline" ]; # patch out gui cli because it is not supported in this packaging # nixify path to the trafilatura binary in the test suite postPatch = '' substituteInPlace setup.py --replace '"trafilatura_gui=trafilatura.gui:main",' "" substituteInPlace tests/cli_tests.py --replace "trafilatura_bin = 'trafilatura'" "trafilatura_bin = '$out/bin/trafilatura'" ''; pythonImportsCheck = [ "trafilatura" ]; meta = with lib; { description = "Python package and command-line tool designed to gather text on the Web"; homepage = "https://trafilatura.readthedocs.io"; changelog = "https://github.com/adbar/trafilatura/blob/v${version}/HISTORY.md"; license = licenses.gpl3Plus; maintainers = with maintainers; [ jokatzke ]; }; } pkgs/top-level/python-packages.nix +2 −0 Original line number Diff line number Diff line Loading @@ -14493,6 +14493,8 @@ self: super: with self; { trackpy = callPackage ../development/python-modules/trackpy { }; trafilatura = callPackage ../development/python-modules/trafilatura { }; trailrunner = callPackage ../development/python-modules/trailrunner {}; trainer = callPackage ../development/python-modules/trainer {}; Loading Loading
pkgs/development/python-modules/trafilatura/default.nix 0 → 100644 +67 −0 Original line number Diff line number Diff line { lib , buildPythonPackage , fetchPypi , pytestCheckHook , pythonOlder , certifi , charset-normalizer , courlan , htmldate , justext , lxml , urllib3 }: buildPythonPackage rec { pname = "trafilatura"; version = "1.6.3"; format = "setuptools"; disabled = pythonOlder "3.6"; src = fetchPypi { inherit pname version; hash = "sha256-Zx3W4AAOEBxLzo1w9ECLy3n8vyJ17iVZHv4z4sihYA0="; }; propagatedBuildInputs = [ certifi charset-normalizer courlan htmldate justext lxml urllib3 ]; nativeCheckInputs = [ pytestCheckHook ]; # disable tests that require an internet connection disabledTests = [ "test_download" "test_fetch" "test_redirection" "test_meta_redirections" "test_crawl_page" "test_whole" "test_probing" "test_cli_pipeline" ]; # patch out gui cli because it is not supported in this packaging # nixify path to the trafilatura binary in the test suite postPatch = '' substituteInPlace setup.py --replace '"trafilatura_gui=trafilatura.gui:main",' "" substituteInPlace tests/cli_tests.py --replace "trafilatura_bin = 'trafilatura'" "trafilatura_bin = '$out/bin/trafilatura'" ''; pythonImportsCheck = [ "trafilatura" ]; meta = with lib; { description = "Python package and command-line tool designed to gather text on the Web"; homepage = "https://trafilatura.readthedocs.io"; changelog = "https://github.com/adbar/trafilatura/blob/v${version}/HISTORY.md"; license = licenses.gpl3Plus; maintainers = with maintainers; [ jokatzke ]; }; }
pkgs/top-level/python-packages.nix +2 −0 Original line number Diff line number Diff line Loading @@ -14493,6 +14493,8 @@ self: super: with self; { trackpy = callPackage ../development/python-modules/trackpy { }; trafilatura = callPackage ../development/python-modules/trafilatura { }; trailrunner = callPackage ../development/python-modules/trailrunner {}; trainer = callPackage ../development/python-modules/trainer {}; Loading