chore: remove phf from static files (#10259)

Co-authored-by: joshieDo <93316087+joshieDo@users.noreply.github.com> Co-authored-by: Matthias Seitz <matthias.seitz@outlook.de>
2025-12-06 10:59:55 +00:00 · 2024-08-30 15:02:14 +08:00
parent c5a1c0e131
commit 28e46bfd48
9 changed files with 40 additions and 580 deletions
--- a/crates/storage/nippy-jar/Cargo.toml
+++ b/crates/storage/nippy-jar/Cargo.toml
@ -19,7 +19,6 @@ name = "reth_nippy_jar"
 reth-fs-util.workspace = true

 # filter
-ph = "0.8.0"
 cuckoofilter = { version = "0.5.0", features = [
    "serde_support",
    "serde_bytes",
--- a/crates/storage/nippy-jar/src/cursor.rs
+++ b/crates/storage/nippy-jar/src/cursor.rs
@ -1,10 +1,8 @@
 use crate::{
    compression::{Compression, Compressors, Zstd},
-    DataReader, InclusionFilter, NippyJar, NippyJarError, NippyJarHeader, PerfectHashingFunction,
-    RefRow,
+    DataReader, NippyJar, NippyJarError, NippyJarHeader, RefRow,
 };
 use std::{ops::Range, sync::Arc};
-use sucds::int_vectors::Access;
 use zstd::bulk::Decompressor;

 /// Simple cursor implementation to retrieve data from [`NippyJar`].
@ -67,35 +65,6 @@ impl<'a, H: NippyJarHeader> NippyJarCursor<'a, H> {
        self.row = 0;
    }

-    /// Returns a row, searching it by a key.
-    ///
-    /// **May return false positives.**
-    ///
-    /// Example usage would be querying a transactions file with a transaction hash which is **NOT**
-    /// stored in file.
-    pub fn row_by_key(&mut self, key: &[u8]) -> Result<Option<RefRow<'_>>, NippyJarError> {
-        if let (Some(filter), Some(phf)) = (&self.jar.filter, &self.jar.phf) {
-            // TODO: is it worth to parallelize both?
-
-            // May have false positives
-            if filter.contains(key)? {
-                // May have false positives
-                if let Some(row_index) = phf.get_index(key)? {
-                    self.row = self
-                        .jar
-                        .offsets_index
-                        .access(row_index as usize)
-                        .expect("built from same set") as u64;
-                    return self.next_row()
-                }
-            }
-        } else {
-            return Err(NippyJarError::UnsupportedFilterQuery)
-        }
-
-        Ok(None)
-    }
-
    /// Returns a row by its number.
    pub fn row_by_number(&mut self, row: usize) -> Result<Option<RefRow<'_>>, NippyJarError> {
        self.row = row as u64;
@ -130,40 +99,6 @@ impl<'a, H: NippyJarHeader> NippyJarCursor<'a, H> {
        ))
    }

-    /// Returns a row, searching it by a key using a
-    /// `mask` to only read certain columns from the row.
-    ///
-    /// **May return false positives.**
-    ///
-    /// Example usage would be querying a transactions file with a transaction hash which is **NOT**
-    /// stored in file.
-    pub fn row_by_key_with_cols(
-        &mut self,
-        key: &[u8],
-        mask: usize,
-    ) -> Result<Option<RefRow<'_>>, NippyJarError> {
-        if let (Some(filter), Some(phf)) = (&self.jar.filter, &self.jar.phf) {
-            // TODO: is it worth to parallelize both?
-
-            // May have false positives
-            if filter.contains(key)? {
-                // May have false positives
-                if let Some(row_index) = phf.get_index(key)? {
-                    self.row = self
-                        .jar
-                        .offsets_index
-                        .access(row_index as usize)
-                        .expect("built from same set") as u64;
-                    return self.next_row_with_cols(mask)
-                }
-            }
-        } else {
-            return Err(NippyJarError::UnsupportedFilterQuery)
-        }
-
-        Ok(None)
-    }
-
    /// Returns a row by its number by using a `mask` to only read certain columns from the row.
    pub fn row_by_number_with_cols(
        &mut self,
--- a/crates/storage/nippy-jar/src/error.rs
+++ b/crates/storage/nippy-jar/src/error.rs
@ -31,10 +31,6 @@ pub enum NippyJarError {
    FilterMaxCapacity,
    #[error("cuckoo was not properly initialized after loaded")]
    FilterCuckooNotLoaded,
-    #[error("perfect hashing function doesn't have any keys added")]
-    PHFMissingKeys,
-    #[error("nippy jar initialized without perfect hashing function")]
-    PHFMissing,
    #[error("nippy jar was built without an index")]
    UnsupportedFilterQuery,
    #[error("the size of an offset must be at most 8 bytes, got {offset_size}")]
--- a/crates/storage/nippy-jar/src/lib.rs
+++ b/crates/storage/nippy-jar/src/lib.rs
@ -32,9 +32,10 @@ pub mod compression;
 use compression::Compression;
 use compression::Compressors;

-pub mod phf;
-pub use phf::PHFKey;
-use phf::{Fmph, Functions, GoFmph, PerfectHashingFunction};
+/// empty enum for backwards compatibility
+#[derive(Debug, Serialize, Deserialize)]
+#[cfg_attr(test, derive(PartialEq, Eq))]
+pub enum Functions {}

 mod error;
 pub use error::NippyJarError;
@ -74,24 +75,6 @@ impl<T> NippyJarHeader for T where
 ///
 /// Data is organized into a columnar format, enabling column-based compression. Data retrieval
 /// entails consulting an offset list and fetching the data from file via `mmap`.
-///
-/// PHF & Filters:
-/// For data membership verification, the `filter` field can be configured with algorithms like
-/// Bloom or Cuckoo filters. While these filters enable rapid membership checks, it's important to
-/// note that **they may yield false positives but not false negatives**. Therefore, they serve as
-/// preliminary checks (eg. in `by_hash` queries) and should be followed by data verification on
-/// retrieval.
-///
-/// The `phf` (Perfect Hashing Function) and `offsets_index` fields facilitate the data retrieval
-/// process in for example `by_hash` queries. Specifically, the PHF converts a query, such as a
-/// block hash, into a unique integer. This integer is then used as an index in `offsets_index`,
-/// which maps to the actual data location in the `offsets` list. Similar to the `filter`, the PHF
-/// may also produce false positives but not false negatives, necessitating subsequent data
-/// verification.
-///
-/// Note: that the key (eg. `BlockHash`) passed to a filter and phf does not need to actually be
-/// stored.
-///
 /// Ultimately, the `freeze` function yields two files: a data file containing both the data and its
 /// configuration, and an index file that houses the offsets and `offsets_index`.
 #[derive(Serialize, Deserialize)]
@ -112,7 +95,7 @@ pub struct NippyJar<H = ()> {
    /// Optional filter function for data membership checks.
    filter: Option<InclusionFilters>,
    #[serde(skip)]
-    /// Optional Perfect Hashing Function (PHF) for unique offset mapping.
+    /// Optional field for backwards compatibility
    phf: Option<Functions>,
    /// Index mapping PHF output to value offsets in `offsets`.
    #[serde(skip)]
@ -196,18 +179,6 @@ impl<H: NippyJarHeader> NippyJar<H> {
        self
    }

-    /// Adds [`phf::Fmph`] perfect hashing function.
-    pub fn with_fmph(mut self) -> Self {
-        self.phf = Some(Functions::Fmph(Fmph::new()));
-        self
-    }
-
-    /// Adds [`phf::GoFmph`] perfect hashing function.
-    pub fn with_gofmph(mut self) -> Self {
-        self.phf = Some(Functions::GoFmph(GoFmph::new()));
-        self
-    }
-
    /// Gets a reference to the user header.
    pub const fn user_header(&self) -> &H {
        &self.user_header
@ -346,16 +317,6 @@ impl<H: NippyJarHeader> InclusionFilter for NippyJar<H> {
    }
 }

-impl<H: NippyJarHeader> PerfectHashingFunction for NippyJar<H> {
-    fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
-        self.phf.as_mut().ok_or(NippyJarError::PHFMissing)?.set_keys(keys)
-    }
-
-    fn get_index(&self, key: &[u8]) -> Result<Option<u64>, NippyJarError> {
-        self.phf.as_ref().ok_or(NippyJarError::PHFMissing)?.get_index(key)
-    }
-}
-
 #[cfg(test)]
 impl<H: NippyJarHeader> NippyJar<H> {
    /// If required, prepares any compression algorithm to an early pass of the data.
@ -371,55 +332,6 @@ impl<H: NippyJarHeader> NippyJar<H> {
        Ok(())
    }

-    /// Prepares beforehand the offsets index for querying rows based on `values` (eg. transaction
-    /// hash). Expects `values` to be sorted in the same way as the data that is going to be
-    /// later on inserted.
-    ///
-    /// Currently collecting all items before acting on them.
-    pub fn prepare_index<T: PHFKey>(
-        &mut self,
-        values: impl IntoIterator<Item = ColumnResult<T>>,
-        row_count: usize,
-    ) -> Result<(), NippyJarError> {
-        debug!(target: "nippy-jar", ?row_count, "Preparing index.");
-
-        let values = values.into_iter().collect::<Result<Vec<_>, _>>()?;
-
-        debug_assert!(
-            row_count == values.len(),
-            "Row count ({row_count}) differs from value list count ({}).",
-            values.len()
-        );
-
-        let mut offsets_index = vec![0; row_count];
-
-        // Builds perfect hashing function from the values
-        if let Some(phf) = self.phf.as_mut() {
-            debug!(target: "nippy-jar", ?row_count, values_count = ?values.len(), "Setting keys for perfect hashing function.");
-            phf.set_keys(&values)?;
-        }
-
-        if self.filter.is_some() || self.phf.is_some() {
-            debug!(target: "nippy-jar", ?row_count, "Creating filter and offsets_index.");
-
-            for (row_num, v) in values.into_iter().enumerate() {
-                if let Some(filter) = self.filter.as_mut() {
-                    filter.add(v.as_ref())?;
-                }
-
-                if let Some(phf) = self.phf.as_mut() {
-                    // Points to the first column value offset of the row.
-                    let index = phf.get_index(v.as_ref())?.expect("initialized") as usize;
-                    let _ = std::mem::replace(&mut offsets_index[index], row_num as u64);
-                }
-            }
-        }
-
-        debug!(target: "nippy-jar", ?row_count, "Encoding offsets index list.");
-        self.offsets_index = PrefixSummedEliasFano::from_slice(&offsets_index)?;
-        Ok(())
-    }
-
    /// Writes all data and configuration to a file and the offset index to another.
    pub fn freeze(
        self,
@ -447,7 +359,7 @@ impl<H: NippyJarHeader> NippyJar<H> {
        Ok(writer.into_jar())
    }

-    /// Freezes [`PerfectHashingFunction`], [`InclusionFilter`] and the offset index to file.
+    /// Freezes  [`InclusionFilter`] and the offset index to file.
    fn freeze_filters(&self) -> Result<(), NippyJarError> {
        debug!(target: "nippy-jar", path=?self.index_path(), "Writing offsets and offsets index to file.");

@ -474,11 +386,6 @@ impl<H: NippyJarHeader> NippyJar<H> {
            }
        }

-        // Check `prepare_index` was called.
-        if let Some(phf) = &self.phf {
-            let _ = phf.get_index(&[])?;
-        }
-
        Ok(())
    }
 }
@ -588,7 +495,7 @@ mod tests {
    use super::*;
    use compression::Compression;
    use rand::{rngs::SmallRng, seq::SliceRandom, RngCore, SeedableRng};
-    use std::{collections::HashSet, fs::OpenOptions};
+    use std::{fs::OpenOptions, io::Read};

    type ColumnResults<T> = Vec<ColumnResult<T>>;
    type ColumnValues = Vec<Vec<u8>>;
@ -617,57 +524,30 @@ mod tests {
    }

    #[test]
-    fn test_phf() {
-        let (col1, col2) = test_data(None);
-        let num_columns = 2;
-        let num_rows = col1.len() as u64;
-        let file_path = tempfile::NamedTempFile::new().unwrap();
+    fn test_config_serialization() {
+        let file = tempfile::NamedTempFile::new().unwrap();
+        let jar = NippyJar::new_without_header(23, file.path()).with_lz4();
+        jar.freeze_config().unwrap();

-        let create_nippy = || -> NippyJar<()> {
-            let mut nippy = NippyJar::new_without_header(num_columns, file_path.path());
-            assert!(matches!(
-                NippyJar::set_keys(&mut nippy, &col1),
-                Err(NippyJarError::PHFMissing)
-            ));
-            nippy
-        };
+        let mut config_file = OpenOptions::new().read(true).open(jar.config_path()).unwrap();
+        let config_file_len = config_file.metadata().unwrap().len();
+        assert_eq!(config_file_len, 37);

-        let check_phf = |mut nippy: NippyJar<_>| {
-            assert!(matches!(
-                NippyJar::get_index(&nippy, &col1[0]),
-                Err(NippyJarError::PHFMissingKeys)
-            ));
-            assert!(NippyJar::set_keys(&mut nippy, &col1).is_ok());
+        let mut buf = Vec::with_capacity(config_file_len as usize);
+        config_file.read_to_end(&mut buf).unwrap();

-            let collect_indexes = |nippy: &NippyJar<_>| -> Vec<u64> {
-                col1.iter()
-                    .map(|value| NippyJar::get_index(nippy, value.as_slice()).unwrap().unwrap())
-                    .collect()
-            };
+        assert_eq!(
+            vec![
+                1, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+            ],
+            buf
+        );

-            // Ensure all indexes are unique
-            let indexes = collect_indexes(&nippy);
-            assert_eq!(indexes.iter().collect::<HashSet<_>>().len(), indexes.len());
-
-            // Ensure reproducibility
-            assert!(NippyJar::set_keys(&mut nippy, &col1).is_ok());
-            assert_eq!(indexes, collect_indexes(&nippy));
-
-            // Ensure that loaded phf provides the same function outputs
-            nippy.prepare_index(clone_with_result(&col1), col1.len()).unwrap();
-            nippy
-                .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows)
-                .unwrap();
-            let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap();
-            loaded_nippy.load_filters().unwrap();
-            assert_eq!(indexes, collect_indexes(&loaded_nippy));
-        };
-
-        // fmph bytes size for 100 values of 32 bytes: 54
-        check_phf(create_nippy().with_fmph());
-
-        // fmph bytes size for 100 values of 32 bytes: 46
-        check_phf(create_nippy().with_gofmph());
+        let mut read_jar = bincode::deserialize_from::<_, NippyJar>(&buf[..]).unwrap();
+        // Path is not ser/de
+        read_jar.path = file.path().to_path_buf();
+        assert_eq!(jar, read_jar);
    }

    #[test]
@ -891,11 +771,9 @@ mod tests {
            let mut nippy =
                NippyJar::new(num_columns, file_path.path(), BlockJarHeader { block_start })
                    .with_zstd(true, 5000)
-                    .with_cuckoo_filter(col1.len())
-                    .with_fmph();
+                    .with_cuckoo_filter(col1.len());

            nippy.prepare_compression(data.clone()).unwrap();
-            nippy.prepare_index(clone_with_result(&col1), col1.len()).unwrap();
            nippy
                .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows)
                .unwrap();
@ -908,7 +786,6 @@ mod tests {

            assert!(loaded_nippy.compressor().is_some());
            assert!(loaded_nippy.filter.is_some());
-            assert!(loaded_nippy.phf.is_some());
            assert_eq!(loaded_nippy.user_header().block_start, block_start);

            if let Some(Compressors::Zstd(_zstd)) = loaded_nippy.compressor() {
@ -929,22 +806,9 @@ mod tests {
                data.shuffle(&mut rand::thread_rng());

                for (row_num, (v0, v1)) in data {
-                    // Simulates `by_hash` queries by iterating col1 values, which were used to
-                    // create the inner index.
-                    {
-                        let row_by_value = cursor
-                            .row_by_key(v0)
-                            .unwrap()
-                            .unwrap()
-                            .iter()
-                            .map(|a| a.to_vec())
-                            .collect::<Vec<_>>();
-                        assert_eq!((&row_by_value[0], &row_by_value[1]), (v0, v1));
-
-                        // Simulates `by_number` queries
-                        let row_by_num = cursor.row_by_number(row_num).unwrap().unwrap();
-                        assert_eq!(row_by_value, row_by_num);
-                    }
+                    // Simulates `by_number` queries
+                    let row_by_num = cursor.row_by_number(row_num).unwrap().unwrap();
+                    assert_eq!((&row_by_num[0].to_vec(), &row_by_num[1].to_vec()), (v0, v1));
                }
            }
        }
@ -962,11 +826,9 @@ mod tests {
        {
            let mut nippy = NippyJar::new_without_header(num_columns, file_path.path())
                .with_zstd(true, 5000)
-                .with_cuckoo_filter(col1.len())
-                .with_fmph();
+                .with_cuckoo_filter(col1.len());

            nippy.prepare_compression(data).unwrap();
-            nippy.prepare_index(clone_with_result(&col1), col1.len()).unwrap();
            nippy
                .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows)
                .unwrap();
@ -989,84 +851,41 @@ mod tests {

                // Read both columns
                for (row_num, (v0, v1)) in &data {
-                    // Simulates `by_hash` queries by iterating col1 values, which were used to
-                    // create the inner index.
-                    let row_by_value = cursor
-                        .row_by_key_with_cols(v0, BLOCKS_FULL_MASK)
-                        .unwrap()
-                        .unwrap()
-                        .iter()
-                        .map(|a| a.to_vec())
-                        .collect::<Vec<_>>();
-                    assert_eq!((&row_by_value[0], &row_by_value[1]), (*v0, *v1));
-
                    // Simulates `by_number` queries
                    let row_by_num = cursor
                        .row_by_number_with_cols(*row_num, BLOCKS_FULL_MASK)
                        .unwrap()
                        .unwrap();
-                    assert_eq!(row_by_value, row_by_num);
+                    assert_eq!((&row_by_num[0].to_vec(), &row_by_num[1].to_vec()), (*v0, *v1));
                }

                // Read first column only: `Block`
                const BLOCKS_BLOCK_MASK: usize = 0b01;
                for (row_num, (v0, _)) in &data {
-                    // Simulates `by_hash` queries by iterating col1 values, which were used to
-                    // create the inner index.
-                    let row_by_value = cursor
-                        .row_by_key_with_cols(v0, BLOCKS_BLOCK_MASK)
-                        .unwrap()
-                        .unwrap()
-                        .iter()
-                        .map(|a| a.to_vec())
-                        .collect::<Vec<_>>();
-                    assert_eq!(row_by_value.len(), 1);
-                    assert_eq!(&row_by_value[0], *v0);
-
                    // Simulates `by_number` queries
                    let row_by_num = cursor
                        .row_by_number_with_cols(*row_num, BLOCKS_BLOCK_MASK)
                        .unwrap()
                        .unwrap();
                    assert_eq!(row_by_num.len(), 1);
-                    assert_eq!(row_by_value, row_by_num);
+                    assert_eq!(&row_by_num[0].to_vec(), *v0);
                }

                // Read second column only: `Block`
                const BLOCKS_WITHDRAWAL_MASK: usize = 0b10;
-                for (row_num, (v0, v1)) in &data {
-                    // Simulates `by_hash` queries by iterating col1 values, which were used to
-                    // create the inner index.
-                    let row_by_value = cursor
-                        .row_by_key_with_cols(v0, BLOCKS_WITHDRAWAL_MASK)
-                        .unwrap()
-                        .unwrap()
-                        .iter()
-                        .map(|a| a.to_vec())
-                        .collect::<Vec<_>>();
-                    assert_eq!(row_by_value.len(), 1);
-                    assert_eq!(&row_by_value[0], *v1);
-
+                for (row_num, (_, v1)) in &data {
                    // Simulates `by_number` queries
                    let row_by_num = cursor
                        .row_by_number_with_cols(*row_num, BLOCKS_WITHDRAWAL_MASK)
                        .unwrap()
                        .unwrap();
                    assert_eq!(row_by_num.len(), 1);
-                    assert_eq!(row_by_value, row_by_num);
+                    assert_eq!(&row_by_num[0].to_vec(), *v1);
                }

                // Read nothing
                const BLOCKS_EMPTY_MASK: usize = 0b00;
-                for (row_num, (v0, _)) in &data {
-                    // Simulates `by_hash` queries by iterating col1 values, which were used to
-                    // create the inner index.
-                    assert!(cursor
-                        .row_by_key_with_cols(v0, BLOCKS_EMPTY_MASK)
-                        .unwrap()
-                        .unwrap()
-                        .is_empty());
-
+                for (row_num, _) in &data {
                    // Simulates `by_number` queries
                    assert!(cursor
                        .row_by_number_with_cols(*row_num, BLOCKS_EMPTY_MASK)
--- a/crates/storage/nippy-jar/src/phf/fmph.rs
+++ b/crates/storage/nippy-jar/src/phf/fmph.rs
@ -1,99 +0,0 @@
-use crate::{NippyJarError, PHFKey, PerfectHashingFunction};
-use ph::fmph::{BuildConf, Function};
-use serde::{
-    de::Error as DeSerdeError, ser::Error as SerdeError, Deserialize, Deserializer, Serialize,
-    Serializer,
-};
-
-/// Wrapper struct for [`Function`]. Implementation of the following [paper](https://dl.acm.org/doi/10.1145/3596453).
-#[derive(Default)]
-pub struct Fmph {
-    function: Option<Function>,
-}
-
-impl Fmph {
-    pub const fn new() -> Self {
-        Self { function: None }
-    }
-}
-
-impl PerfectHashingFunction for Fmph {
-    fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
-        self.function = Some(Function::from_slice_with_conf(
-            keys,
-            BuildConf { use_multiple_threads: true, ..Default::default() },
-        ));
-        Ok(())
-    }
-
-    fn get_index(&self, key: &[u8]) -> Result<Option<u64>, NippyJarError> {
-        if let Some(f) = &self.function {
-            return Ok(f.get(key))
-        }
-        Err(NippyJarError::PHFMissingKeys)
-    }
-}
-
-#[cfg(test)]
-impl PartialEq for Fmph {
-    fn eq(&self, _other: &Self) -> bool {
-        match (&self.function, &_other.function) {
-            (Some(func1), Some(func2)) => {
-                func1.level_sizes() == func2.level_sizes() &&
-                    func1.write_bytes() == func2.write_bytes() &&
-                    {
-                        let mut f1 = Vec::with_capacity(func1.write_bytes());
-                        func1.write(&mut f1).expect("enough capacity");
-
-                        let mut f2 = Vec::with_capacity(func2.write_bytes());
-                        func2.write(&mut f2).expect("enough capacity");
-
-                        f1 == f2
-                    }
-            }
-            (None, None) => true,
-            _ => false,
-        }
-    }
-}
-
-impl std::fmt::Debug for Fmph {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("Fmph")
-            .field("bytes_size", &self.function.as_ref().map(|f| f.write_bytes()))
-            .finish_non_exhaustive()
-    }
-}
-
-impl Serialize for Fmph {
-    /// Potentially expensive, but should be used only when creating the file.
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        match &self.function {
-            Some(f) => {
-                let mut v = Vec::with_capacity(f.write_bytes());
-                f.write(&mut v).map_err(S::Error::custom)?;
-                serializer.serialize_some(&v)
-            }
-            None => serializer.serialize_none(),
-        }
-    }
-}
-
-impl<'de> Deserialize<'de> for Fmph {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: Deserializer<'de>,
-    {
-        if let Some(buffer) = <Option<Vec<u8>>>::deserialize(deserializer)? {
-            return Ok(Self {
-                function: Some(
-                    Function::read(&mut std::io::Cursor::new(buffer)).map_err(D::Error::custom)?,
-                ),
-            })
-        }
-        Ok(Self { function: None })
-    }
-}
--- a/crates/storage/nippy-jar/src/phf/go_fmph.rs
+++ b/crates/storage/nippy-jar/src/phf/go_fmph.rs
@ -1,100 +0,0 @@
-use crate::{NippyJarError, PHFKey, PerfectHashingFunction};
-use ph::fmph::{GOBuildConf, GOFunction};
-use serde::{
-    de::Error as DeSerdeError, ser::Error as SerdeError, Deserialize, Deserializer, Serialize,
-    Serializer,
-};
-
-/// Wrapper struct for [`GOFunction`]. Implementation of the following [paper](https://dl.acm.org/doi/10.1145/3596453).
-#[derive(Default)]
-pub struct GoFmph {
-    function: Option<GOFunction>,
-}
-
-impl GoFmph {
-    pub const fn new() -> Self {
-        Self { function: None }
-    }
-}
-
-impl PerfectHashingFunction for GoFmph {
-    fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
-        self.function = Some(GOFunction::from_slice_with_conf(
-            keys,
-            GOBuildConf { use_multiple_threads: true, ..Default::default() },
-        ));
-        Ok(())
-    }
-
-    fn get_index(&self, key: &[u8]) -> Result<Option<u64>, NippyJarError> {
-        if let Some(f) = &self.function {
-            return Ok(f.get(key))
-        }
-        Err(NippyJarError::PHFMissingKeys)
-    }
-}
-
-#[cfg(test)]
-impl PartialEq for GoFmph {
-    fn eq(&self, other: &Self) -> bool {
-        match (&self.function, &other.function) {
-            (Some(func1), Some(func2)) => {
-                func1.level_sizes() == func2.level_sizes() &&
-                    func1.write_bytes() == func2.write_bytes() &&
-                    {
-                        let mut f1 = Vec::with_capacity(func1.write_bytes());
-                        func1.write(&mut f1).expect("enough capacity");
-
-                        let mut f2 = Vec::with_capacity(func2.write_bytes());
-                        func2.write(&mut f2).expect("enough capacity");
-
-                        f1 == f2
-                    }
-            }
-            (None, None) => true,
-            _ => false,
-        }
-    }
-}
-
-impl std::fmt::Debug for GoFmph {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("GoFmph")
-            .field("bytes_size", &self.function.as_ref().map(|f| f.write_bytes()))
-            .finish_non_exhaustive()
-    }
-}
-
-impl Serialize for GoFmph {
-    /// Potentially expensive, but should be used only when creating the file.
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        match &self.function {
-            Some(f) => {
-                let mut v = Vec::with_capacity(f.write_bytes());
-                f.write(&mut v).map_err(S::Error::custom)?;
-                serializer.serialize_some(&v)
-            }
-            None => serializer.serialize_none(),
-        }
-    }
-}
-
-impl<'de> Deserialize<'de> for GoFmph {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: Deserializer<'de>,
-    {
-        if let Some(buffer) = <Option<Vec<u8>>>::deserialize(deserializer)? {
-            return Ok(Self {
-                function: Some(
-                    GOFunction::read(&mut std::io::Cursor::new(buffer))
-                        .map_err(D::Error::custom)?,
-                ),
-            })
-        }
-        Ok(Self { function: None })
-    }
-}
--- a/crates/storage/nippy-jar/src/phf/mod.rs
+++ b/crates/storage/nippy-jar/src/phf/mod.rs
@ -1,46 +0,0 @@
-use crate::NippyJarError;
-use serde::{Deserialize, Serialize};
-use std::hash::Hash;
-
-mod fmph;
-pub use fmph::Fmph;
-
-mod go_fmph;
-pub use go_fmph::GoFmph;
-
-/// Trait alias for [`PerfectHashingFunction`] keys.
-pub trait PHFKey: AsRef<[u8]> + Sync + Clone + Hash {}
-impl<T: AsRef<[u8]> + Sync + Clone + Hash> PHFKey for T {}
-
-/// Trait to build and query a perfect hashing function.
-pub trait PerfectHashingFunction: Serialize + for<'a> Deserialize<'a> {
-    /// Adds the key set and builds the perfect hashing function.
-    fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError>;
-
-    /// Get corresponding associated integer. There might be false positives.
-    fn get_index(&self, key: &[u8]) -> Result<Option<u64>, NippyJarError>;
-}
-
-/// Enumerates all types of perfect hashing functions.
-#[derive(Debug, Serialize, Deserialize)]
-#[cfg_attr(test, derive(PartialEq))]
-pub enum Functions {
-    Fmph(Fmph),
-    GoFmph(GoFmph),
-}
-
-impl PerfectHashingFunction for Functions {
-    fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
-        match self {
-            Self::Fmph(f) => f.set_keys(keys),
-            Self::GoFmph(f) => f.set_keys(keys),
-        }
-    }
-
-    fn get_index(&self, key: &[u8]) -> Result<Option<u64>, NippyJarError> {
-        match self {
-            Self::Fmph(f) => f.get_index(key),
-            Self::GoFmph(f) => f.get_index(key),
-        }
-    }
-}