File size: 1,948 Bytes
84d2a97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
use std::path::{Path, PathBuf};

use ahash::AHashMap;
use io::file_operations::{atomic_save_json, read_json};
use serde::{Deserialize, Serialize};
use sparse::common::sparse_vector::{RemappedSparseVector, SparseVector};
use sparse::common::types::{DimId, DimOffset};

use crate::common::operation_error::OperationResult;

const INDICES_TRACKER_FILE_NAME: &str = "indices_tracker.json";

#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct IndicesTracker {
    pub map: AHashMap<DimId, DimOffset>,
}

impl IndicesTracker {
    pub fn open(path: &Path) -> std::io::Result<Self> {
        let path = Self::file_path(path);
        Ok(read_json(&path)?)
    }

    pub fn save(&self, path: &Path) -> OperationResult<()> {
        let path = Self::file_path(path);
        Ok(atomic_save_json(&path, self)?)
    }

    pub fn file_path(path: &Path) -> PathBuf {
        path.join(INDICES_TRACKER_FILE_NAME)
    }

    pub fn register_indices(&mut self, vector: &SparseVector) {
        for index in &vector.indices {
            if !self.map.contains_key(index) {
                self.map.insert(*index, self.map.len() as DimId);
            }
        }
    }

    pub fn remap_index(&self, index: DimId) -> Option<DimOffset> {
        self.map.get(&index).copied()
    }

    pub fn remap_vector(&self, vector: SparseVector) -> RemappedSparseVector {
        let mut placeholder_indices = self.map.len() as DimOffset;
        let SparseVector {
            mut indices,
            values,
        } = vector;

        indices.iter_mut().for_each(|index| {
            *index = if let Some(index) = self.remap_index(*index) {
                index
            } else {
                placeholder_indices += 1;
                placeholder_indices
            }
        });

        let mut remapped_vector = RemappedSparseVector { indices, values };
        remapped_vector.sort_by_indices();
        remapped_vector
    }
}