Unverified Commit ab82f5a5 authored by Diogo Correia's avatar Diogo Correia
Browse files

postgresqlPackages.vectorchord: init at 0.4.2

parent 136519ba
Loading
Loading
Loading
Loading
+29 −0
Original line number Diff line number Diff line
diff --git a/crates/simd/build.rs b/crates/simd/build.rs
index 12ce198..aed5588 100644
--- a/crates/simd/build.rs
+++ b/crates/simd/build.rs
@@ -17,17 +17,24 @@ use std::error::Error;
 
 fn main() -> Result<(), Box<dyn Error>> {
     println!("cargo::rerun-if-changed=cshim");
+    println!("cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS");
     let target_arch = var("CARGO_CFG_TARGET_ARCH")?;
     match target_arch.as_str() {
         "aarch64" => {
             let mut build = cc::Build::new();
             build.file("./cshim/aarch64.c");
+            build.compiler("@clang@");
+            // read env var set by rustPlatform.bindgenHook
+            build.try_flags_from_environment("BINDGEN_EXTRA_CLANG_ARGS").expect("the BINDGEN_EXTRA_CLANG_ARGS environment variable must be specified and UTF-8");
             build.opt_level(3);
             build.compile("simd_cshim");
         }
         "x86_64" => {
             let mut build = cc::Build::new();
             build.file("./cshim/x86_64.c");
+            build.compiler("@clang@");
+            // read env var set by rustPlatform.bindgenHook
+            build.try_flags_from_environment("BINDGEN_EXTRA_CLANG_ARGS").expect("the BINDGEN_EXTRA_CLANG_ARGS environment variable must be specified and UTF-8");
             build.opt_level(3);
             build.compile("simd_cshim");
         }
+24 −0
Original line number Diff line number Diff line
diff --git a/crates/algorithm/src/lib.rs b/crates/algorithm/src/lib.rs
index 853a280..f88acbf 100644
--- a/crates/algorithm/src/lib.rs
+++ b/crates/algorithm/src/lib.rs
@@ -13,6 +13,7 @@
 // Copyright (c) 2025 TensorChord Inc.
 
 #![feature(select_unpredictable)]
+#![feature(let_chains)]
 #![allow(clippy::type_complexity)]
 
 mod build;
diff --git a/src/lib.rs b/src/lib.rs
index 654b4d1..2b11d03 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,6 +13,7 @@
 // Copyright (c) 2025 TensorChord Inc.
 
 #![allow(unsafe_code)]
+#![feature(let_chains)]
 
 mod datatype;
 mod index;
+65 −0
Original line number Diff line number Diff line
diff --git a/crates/algorithm/src/operator.rs b/crates/algorithm/src/operator.rs
index 7de8d07..c496dcd 100644
--- a/crates/algorithm/src/operator.rs
+++ b/crates/algorithm/src/operator.rs
@@ -672,7 +672,7 @@ impl Operator for Op<VectOwned<f32>, L2> {
                 use std::iter::zip;
                 let dims = vector.dims();
                 let t = zip(&code.1, centroid.slice())
-                    .map(|(&sign, &num)| std::hint::select_unpredictable(sign, num, -num))
+                    .map(|(&sign, &num)| sign.select_unpredictable(num, -num))
                     .sum::<f32>()
                     / (dims as f32).sqrt();
                 let sum_of_x_2 = code.0.dis_u_2;
@@ -763,7 +763,7 @@ impl Operator for Op<VectOwned<f32>, Dot> {
                 use std::iter::zip;
                 let dims = vector.dims();
                 let t = zip(&code.1, centroid.slice())
-                    .map(|(&sign, &num)| std::hint::select_unpredictable(sign, num, -num))
+                    .map(|(&sign, &num)| sign.select_unpredictable(num, -num))
                     .sum::<f32>()
                     / (dims as f32).sqrt();
                 let sum_of_x_2 = code.0.dis_u_2;
@@ -854,7 +854,7 @@ impl Operator for Op<VectOwned<f16>, L2> {
                 use std::iter::zip;
                 let dims = vector.dims();
                 let t = zip(&code.1, centroid.slice())
-                    .map(|(&sign, &num)| std::hint::select_unpredictable(sign, num, -num).to_f32())
+                    .map(|(&sign, &num)| sign.select_unpredictable(num, -num).to_f32())
                     .sum::<f32>()
                     / (dims as f32).sqrt();
                 let sum_of_x_2 = code.0.dis_u_2;
@@ -945,7 +945,7 @@ impl Operator for Op<VectOwned<f16>, Dot> {
                 use std::iter::zip;
                 let dims = vector.dims();
                 let t = zip(&code.1, centroid.slice())
-                    .map(|(&sign, &num)| std::hint::select_unpredictable(sign, num, -num).to_f32())
+                    .map(|(&sign, &num)| sign.select_unpredictable(num, -num).to_f32())
                     .sum::<f32>()
                     / (dims as f32).sqrt();
                 let sum_of_x_2 = code.0.dis_u_2;
diff --git a/crates/simd/src/rotate.rs b/crates/simd/src/rotate.rs
index 7a211e5..0fcd955 100644
--- a/crates/simd/src/rotate.rs
+++ b/crates/simd/src/rotate.rs
@@ -31,18 +31,17 @@ pub fn givens(lhs: &mut [f32], rhs: &mut [f32]) {
 pub mod flip {
     #[crate::multiversion("v4", "v3", "v2", "a2")]
     pub fn flip(bits: &[u64; 1024], result: &mut [f32]) {
-        use std::hint::select_unpredictable;
         let result: &mut [u32] = unsafe { std::mem::transmute(result) };
         let (slice, remainder) = result.as_chunks_mut::<64>();
         let n = slice.len();
         assert!(n <= 1024);
         for i in 0..n {
             for j in 0..64 {
-                slice[i][j] ^= select_unpredictable((bits[i] & (1 << j)) != 0, 0x80000000, 0);
+                slice[i][j] ^= ((bits[i] & (1 << j)) != 0).select_unpredictable(0x80000000, 0);
             }
         }
         for j in 0..remainder.len() {
-            remainder[j] ^= select_unpredictable((bits[n] & (1 << j)) != 0, 0x80000000, 0);
+            remainder[j] ^= ((bits[n] & (1 << j)) != 0).select_unpredictable(0x80000000, 0);
         }
     }
 }
+146 −0
Original line number Diff line number Diff line
{
  buildPgrxExtension,
  cargo-pgrx_0_14_1,
  clang,
  fetchFromGitHub,
  lib,
  nix-update-script,
  postgresql,
  postgresqlTestExtension,
  replaceVars,
  rust-jemalloc-sys,
  stdenv,
}:
let
  buildPgrxExtension' = buildPgrxExtension.override {
    # Upstream only works with a fixed version of cargo-pgrx for each release,
    # so we're pinning it here to avoid future incompatibility.
    cargo-pgrx = cargo-pgrx_0_14_1;
  };

  # Follow upstream and use rust-jemalloc-sys on linux aarch64 and x86_64
  # Additionally, disable init exec TLS, since it causes issues with postgres.
  # https://github.com/tensorchord/VectorChord/blob/0.4.2/Cargo.toml#L43-L44
  useSystemJemalloc =
    stdenv.hostPlatform.isLinux && (stdenv.hostPlatform.isAarch64 || stdenv.hostPlatform.isx86_64);
  rust-jemalloc-sys' = (
    rust-jemalloc-sys.override (old: {
      jemalloc = old.jemalloc.override { disableInitExecTls = true; };
    })
  );
in
buildPgrxExtension' (finalAttrs: {
  inherit postgresql;

  pname = "vectorchord";
  version = "0.4.2";

  src = fetchFromGitHub {
    owner = "tensorchord";
    repo = "vectorchord";
    tag = finalAttrs.version;
    hash = "sha256-EdMuSNcWwCBsAY0e3d0WVug1KBWYWldvKStF6cf/uRs=";
  };

  patches = [
    # Tell the `simd` crate to use the flags from the rust bindgen hook
    (replaceVars ./0001-read-clang-flags-from-environment.diff {
      clang = lib.getExe clang;
    })
    # Add feature flags needed for features not yet stabilised in rustc stable
    ./0002-add-feature-flags.diff
    # The select_predictable function has been moved from std::bool to std::hint before it has been stabilized.
    # This move isn't present in rustc 1.87, but upstream is using nightly so they have already updated their code.
    # This patch changes the code to use the function on std::bool instead.
    # See https://github.com/rust-lang/rust/pull/139726
    ./0003-select_unpredictable-on-bool.diff
  ];

  buildInputs = lib.optionals (useSystemJemalloc) [
    rust-jemalloc-sys'
  ];

  useFetchCargoVendor = true;
  cargoHash = "sha256-8NwfsJn5dnvog3fexzLmO3v7/3+L7xtv+PHWfCCWoHY=";

  # Include upgrade scripts in the final package
  # https://github.com/tensorchord/VectorChord/blob/0.4.2/crates/make/src/main.rs#L224
  postInstall = ''
    cp sql/upgrade/* $out/share/postgresql/extension/
  '';

  env = {
    # Bypass rust nightly features not being available on rust stable
    RUSTC_BOOTSTRAP = 1;
  };

  # This crate does not have the "pg_test" feature
  usePgTestCheckFeature = false;

  passthru = {
    updateScript = nix-update-script { };

    tests.extension = postgresqlTestExtension {
      inherit (finalAttrs) finalPackage;
      withPackages = [ "pgvector" ]; # vectorchord depends on pgvector at runtime
      postgresqlExtraSettings = ''
        shared_preload_libraries = 'vchord'
      '';

      sql = ''
        CREATE EXTENSION vchord CASCADE;

        CREATE TABLE items (id bigint PRIMARY KEY, embedding vector(3));
        INSERT INTO items (id, embedding) VALUES
        (1, '[1,2,4]'),
        (2, '[1,2,5]'),
        (3, '[0,0,3]'),
        (4, '[0,0,2]'),
        (5, '[0,0,1]');

        CREATE INDEX ON items USING vchordrq (embedding vector_l2_ops) WITH (options = $$
        residual_quantization = true
        [build.internal]
        lists = [4096]
        spherical_centroids = false
        $$);

        SET vchordrq.probes = 1;
      '';

      asserts = [
        {
          query = "SELECT extversion FROM pg_extension WHERE extname = 'vchord'";
          expected = "'${finalAttrs.version}'";
          description = "Expected installed version to match the derivation's version";
        }
        {
          query = "SELECT id FROM items WHERE embedding <-> '[1,2,3]' = 1";
          expected = "1";
          description = "Expected vector of row with ID=1 to have an euclidean distance from [1,2,3] of 1.";
        }
        {
          query = "SELECT id FROM items WHERE embedding <-> '[1,2,3]' = 2";
          expected = "2";
          description = "Expected vector of row with ID=2 to have an euclidean distance from [1,2,3] of 2.";
        }
        {
          query = "SELECT id FROM items ORDER BY embedding <-> '[2,3,7]' LIMIT 1";
          expected = "2";
          description = "Expected vector of row with ID=2 to be the closest to [2,3,7].";
        }
      ];
    };
  };

  meta = {
    changelog = "https://github.com/tensorchord/VectorChord/releases/tag/${finalAttrs.version}";
    description = "Scalable, fast, and disk-friendly vector search in Postgres, the successor of pgvecto.rs";
    homepage = "https://github.com/tensorchord/VectorChord";
    license = lib.licenses.agpl3Only; # dual licensed with Elastic License v2 (ELv2)
    maintainers = with lib.maintainers; [
      diogotcorreia
    ];
    platforms = postgresql.meta.platforms;
  };
})