Unverified Commit fca21eb0 authored by dotlambda's avatar dotlambda Committed by GitHub
Browse files

python3Packages.wiktextract: init at 1.99.7-unstable-2026-03-26 (#441239)

parents d9e02d92 158756b2
Loading
Loading
Loading
Loading
+37 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  setuptools,
  fetchPypi,
  pytestCheckHook,
}:

buildPythonPackage (finalAttrs: {
  pname = "mediawiki-langcodes";
  version = "0.2.18";
  pyproject = true;

  # Using fetchPypi instead of fetching from source for technical reason.
  # It required Internet and Python scripts to build the database.
  src = fetchPypi {
    pname = "mediawiki_langcodes";
    inherit (finalAttrs) version;
    hash = "sha256-9wHlISFD2Pc4qA+kAGR2yRXRby6NGkQRTOoamaoFCxU=";
  };

  build-system = [ setuptools ];

  pythonImportsCheck = [ "mediawiki_langcodes" ];

  nativeCheckInputs = [
    pytestCheckHook
  ];

  meta = {
    description = "Convert MediaWiki language names and language codes";
    homepage = "https://github.com/xxyzz/mediawiki_langcodes";
    changelog = "https://github.com/xxyzz/mediawiki_langcodes/releases/tag/v${finalAttrs.version}";
    license = lib.licenses.gpl3Plus;
    maintainers = with lib.maintainers; [ theobori ];
  };
})
+68 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  setuptools,
  dateparser,
  lupa,
  lxml,
  mediawiki-langcodes,
  psutil,
  requests,
  pytestCheckHook,
  unstableGitUpdater,
}:

buildPythonPackage {
  pname = "wikitextprocessor";
  version = "0.4.96-unstable-2026-03-06";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "tatuylonen";
    repo = "wikitextprocessor";
    fetchSubmodules = true;
    rev = "9d9a410c45c06d30239bcc0d8c1a57718a3f7a2c";
    hash = "sha256-qhl9yRF2MUQvKXgcuxu20h6cEQofN1xMMb4JJZcFHS0=";
  };

  build-system = [ setuptools ];

  pythonRelaxDeps = [ "dateparser" ];

  dependencies = [
    dateparser
    lupa
    lxml
    mediawiki-langcodes
    psutil
    requests
  ];

  pythonImportsCheck = [ "wikitextprocessor" ];

  nativeCheckInputs = [
    pytestCheckHook
  ];

  disabledTests = [
    # It requires Internet
    "test_process_dump"
    # It attempts to write a readonly database
    "test_fetchlanguage"
    "test_language_parser_function"
  ];

  passthru.updateScript = unstableGitUpdater { };

  meta = {
    description = "Parser and expander for Wikipedia, Wiktionary etc. dump files, with Lua execution support";
    homepage = "https://github.com/tatuylonen/wikitextprocessor";
    license = with lib.licenses; [
      mit
      cc-by-sa-40 # Needed for certain test files under Wiktionary licence
      gpl2Plus # Needed for certain files in lua/mediawiki-extensions-Scribunto/
    ];
    maintainers = with lib.maintainers; [ theobori ];
  };
}
+51 −0
Original line number Diff line number Diff line
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  setuptools,
  levenshtein,
  nltk,
  pydantic,
  wikitextprocessor,
  unstableGitUpdater,
}:

buildPythonPackage {
  pname = "wiktextract";
  version = "1.99.7-unstable-2026-03-26";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "tatuylonen";
    repo = "wiktextract";
    rev = "f47b8fc87a0e17f4dcca68f534e73f4c6fa8e8e7";
    hash = "sha256-U9Xm3vRAvONN/DwyhEEM54eiBnv7JKAEXPolK9HfJU8=";
  };

  build-system = [ setuptools ];

  dependencies = [
    levenshtein
    nltk
    pydantic
    wikitextprocessor
  ];

  # It requires Internet
  doCheck = false;

  pythonImportsCheck = [ "wiktextract" ];

  passthru.updateScript = unstableGitUpdater { };

  meta = {
    description = "Wiktionary dump file parser and multilingual data extractor";
    homepage = "https://github.com/tatuylonen/wiktextract";
    license = with lib.licenses; [
      mit
      cc-by-sa-40 # Needed for certain test files under Wiktionary licence
    ];
    maintainers = with lib.maintainers; [ theobori ];
    mainProgram = "wiktwords";
  };
}
+6 −0
Original line number Diff line number Diff line
@@ -9666,6 +9666,8 @@ self: super: with self; {
  mediapy = callPackage ../development/python-modules/mediapy { };
  mediawiki-langcodes = callPackage ../development/python-modules/mediawiki-langcodes { };
  medpy = callPackage ../development/python-modules/medpy { };
  medvol = callPackage ../development/python-modules/medvol { };
@@ -21091,6 +21093,10 @@ self: super: with self; {
  wikitextparser = callPackage ../development/python-modules/wikitextparser { };
  wikitextprocessor = callPackage ../development/python-modules/wikitextprocessor { };
  wiktextract = callPackage ../development/python-modules/wiktextract { };
  willow = callPackage ../development/python-modules/willow { };
  winacl = callPackage ../development/python-modules/winacl { };