Unverified Commit e352a923 authored by TomaSajt's avatar TomaSajt
Browse files

rPackages: refactor code generation to output JSON

parent 7224c89e
Loading
Loading
Loading
Loading
+55 −51
Original line number Diff line number Diff line
#!/usr/bin/env Rscript

# This script can be used to generate the .json file for a given R package set
# that is part of the `rPackages` tree
#
# See R section of the nixpkgs manual for an example of how to use this script

library(data.table)
library(parallel)
library(BiocManager)
cl <- makeCluster(10)
library(jsonlite)

# always order strings according to POSIX ordering
locale <- Sys.setlocale(locale = "C")

biocVersion <- BiocManager:::.version_map()
biocVersion <- biocVersion[biocVersion$R == getRversion()[, 1:2],c("Bioc", "BiocStatus")]
@@ -20,21 +29,25 @@ mirrorUrls <- list( bioc=paste0("http://bioconductor.org/packages/", biocVersion

mirrorType <- commandArgs(trailingOnly=TRUE)[1]
stopifnot(mirrorType %in% names(mirrorUrls))
packagesFile <- paste(mirrorType, 'packages.nix', sep='-')
readFormatted <- as.data.table(read.table(skip=8, sep='"', text=head(readLines(packagesFile), -1)))

packagesFile <- paste(mirrorType, 'packages.json', sep='-')
prevPkgs <- fromJSON(packagesFile)$packages

write(paste("downloading package lists"), stderr())
knownPackages <- lapply(mirrorUrls, function(url) as.data.table(available.packages(url, filters=c("R_version", "OS_type", "duplicates")), method="libcurl"))
pkgs <- knownPackages[mirrorType][[1]]
setkey(pkgs, Package)
knownPackages <- c(unique(do.call("rbind", knownPackages)$Package))
knownPackages <- sapply(knownPackages, gsub, pattern=".", replacement="_", fixed=TRUE)
pkgTables <- lapply(mirrorUrls, function(url) as.data.table(available.packages(url, filters=c("R_version", "OS_type", "duplicates")), method="libcurl"))
knownPackageNames <- c(unique(do.call("rbind", pkgTables)$Package))

pkgTable <- pkgTables[mirrorType][[1]]
mirrorUrl <- mirrorUrls[mirrorType][[1]]

escapeName <- function(name) {
    gsub(".", "_", switch(name, "import" = "r_import", "assert" = "r_assert", name), fixed=TRUE)
}

nixPrefetch <- function(name, version) {
  prevV <- readFormatted$V2 == name & readFormatted$V4 == version
  if (sum(prevV) == 1)
    as.character(readFormatted$V6[ prevV ])
  prevPkg <- prevPkgs[[escapeName(name)]]
  if (!is.null(prevPkg) && prevPkg$version == version)
    prevPkg$sha256

  else {
    # avoid nix-prefetch-url because it often fails to fetch/hash large files
@@ -53,12 +66,7 @@ nixPrefetch <- function(name, version) {

}

escapeName <- function(name) {
    switch(name, "import" = "r_import", "assert" = "r_assert", name)
}

formatPackage <- function(name, version, sha256, depends, imports, linkingTo) {
    attr <- gsub(".", "_", escapeName(name), fixed=TRUE)
    options(warn=5)
    depends <- paste( if (is.na(depends)) "" else gsub("[ \t\n]+", "", depends)
                    , if (is.na(imports)) "" else gsub("[ \t\n]+", "", imports)
@@ -67,48 +75,44 @@ formatPackage <- function(name, version, sha256, depends, imports, linkingTo) {
                    )
    depends <- unlist(strsplit(depends, split=",", fixed=TRUE))
    depends <- lapply(depends, gsub, pattern="([^ \t\n(]+).*", replacement="\\1")
    depends <- lapply(depends, gsub, pattern=".", replacement="_", fixed=TRUE)
    depends <- depends[depends %in% knownPackages]
    depends <- depends[depends %in% knownPackageNames]
    depends <- lapply(depends, escapeName)
    depends <- paste(depends)
    depends <- paste(sort(unique(depends)), collapse=" ")
    paste0("  ", attr, " = derive2 { name=\"", name, "\"; version=\"", version, "\"; sha256=\"", sha256, "\"; depends=[", depends, "]; };")
    depends <- sort(unique(depends))
    list(name=unbox(name), version=unbox(version), sha256=unbox(sha256), depends=depends)
}

clusterExport(cl, c("nixPrefetch","readFormatted", "mirrorUrl", "mirrorType", "knownPackages"))

pkgs <- pkgs[order(Package)]
cl <- makeCluster(10)
clusterExport(cl, c("escapeName", "nixPrefetch", "prevPkgs", "mirrorUrl", "mirrorType", "knownPackageNames"))

write(paste("updating", mirrorType, "packages"), stderr())
pkgs$sha256 <- parApply(cl, pkgs, 1, function(p) nixPrefetch(p[1], p[2]))
nix <- apply(pkgs, 1, function(p) formatPackage(p[1], p[2], p[18], p[4], p[5], p[6]))
write("done", stderr())
pkgTable$sha256 <- parApply(cl, pkgTable, 1, function(p) nixPrefetch(p[1], p[2]))

stopCluster(cl)

pkgs <- lapply(1:nrow(pkgTable), function(i) with(pkgTable[i,], formatPackage(Package, Version, sha256, Depends, Imports, LinkingTo)))
names(pkgs) <- lapply(pkgs, function(p) escapeName(p$name))

# Mark deleted packages as broken
setkey(readFormatted, V2)
markBroken <- function(name) {
  str <- paste0(readFormatted[name], collapse='"')
  if(sum(grep("broken = true;", str)))
    return(str)
  write(paste("marked", name, "as broken"), stderr())
  gsub("};$", "broken = true; };", str)
brokenPkgs <- lapply(prevPkgs[setdiff(names(prevPkgs), names(pkgs))], function(p)
  list(name=unbox(p$name),
       version=unbox(p$version),
       sha256=unbox(p$sha256),
       depends=p$depends,
       broken=unbox(T)))

# sort packages by their non-escaped names
pkgs <- pkgs[order(sapply(pkgs, function(p) p$name))]
brokenPkgs<- brokenPkgs[order(sapply(brokenPkgs, function(p) p$name))]

# empty named list
extraArgs = setNames(list(), character(0))

if (mirrorType != "cran") {
  extraArgs=list(biocVersion=unbox(paste(biocVersion)))
}
broken <- lapply(setdiff(readFormatted[[2]], pkgs[[1]]), markBroken)

cat("# This file is generated from generate-r-packages.R. DO NOT EDIT.\n")
cat("# Execute the following command to update the file.\n")
cat("#\n")
cat(paste("# Rscript generate-r-packages.R", mirrorType, ">new && mv new", packagesFile))
cat("\n\n")
cat("{ self, derive }:\n")
cat("let derive2 = derive ")
if (mirrorType == "cran") { cat("{  }")
} else if (mirrorType == "irkernel") { cat("{}")
} else { cat("{ biocVersion = \"", biocVersion, "\"; }", sep="") }
cat(";\n")
cat("in with self; {\n")
cat(paste(nix, collapse="\n"), "\n", sep="")
cat(paste(broken, collapse="\n"), "\n", sep="")
cat("}\n")

stopCluster(cl)
cat(toJSON(list(extraArgs=extraArgs, packages=c(pkgs, brokenPkgs)), pretty=TRUE))
cat("\n")
write("done", stderr())
+1 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ stdenv.mkDerivation {
        data_table
        parallel
        BiocManager
        jsonlite
      ];
    })
  ];