Spaces:

colonelwatch
/

abstracts-index

Running on Zero

App Files Files Community

colonelwatch commited on Dec 14, 2024

Commit

4751a57

1 Parent(s): a8b8684

Handle new params.json format, including truncation and normalization

Browse files

Files changed (1) hide show

app.py +26 -9

app.py CHANGED Viewed

@@ -23,6 +23,12 @@ class IndexParameters(TypedDict):
     param_string: str  # pass directly to faiss index
 @dataclass
 class Work:
     title: str | None
@@ -108,8 +114,16 @@ def get_env_var[T, U](
     return var
-def get_model(model_name: str, trust_remote_code: bool) -> SentenceTransformer:
-    return SentenceTransformer(model_name, trust_remote_code=trust_remote_code)
 def get_index(dir: Path, search_time_s: float) -> Dataset:
@@ -118,14 +132,14 @@ def get_index(dir: Path, search_time_s: float) -> Dataset:
     faiss_index: faiss.Index = index.get_index("embeddings").faiss_index  # type: ignore
     with open(dir / "params.json", "r") as f:
-        params: list[IndexParameters] = json.load(f)
-    params = [p for p in params if p["exec_time"] < search_time_s]
-    param = max(params, key=(lambda p: p["recall"]))
-    param_string = param["param_string"]
     ps = faiss.ParameterSpace()
     ps.initialize(faiss_index)
-    ps.set_index_parameters(faiss_index, param_string)
     return index
@@ -218,9 +232,10 @@ def main():
     k = get_env_var("K", int, default=20)  # TODO: can't go higher than 20 yet
     mailto = get_env_var("MAILTO", str, None)
-    model = get_model(model_name, trust_remote_code)
     index = get_index(dir, search_time_s)
     if torch.cuda.is_available():
         model = model.half().cuda() if fp16 else model.bfloat16().cuda()
         # TODO: if huggingface datasets exposes an fp16 gpu option, use it here
@@ -230,7 +245,9 @@ def main():
     # function signature: (expanded tuple of input batches) -> tuple of output batches
     def search(query: list[str]) -> tuple[list[str]]:
-        query_embedding = model.encode(query, prompt_name)
         distances, faiss_ids = index.search_batch("embeddings", query_embedding, k)
         faiss_ids_flat = list(chain(*faiss_ids))

     param_string: str  # pass directly to faiss index
+class Params(TypedDict):
+    dimensions: int | None
+    normalize: bool
+    optimal_params: list[IndexParameters]
 @dataclass
 class Work:
     title: str | None
     return var
+def get_model(
+    model_name: str, params_dir: Path, trust_remote_code: bool
+) -> tuple[bool, SentenceTransformer]:
+    with open(params_dir / "params.json", "r") as f:
+        params: Params = json.load(f)
+    return params["normalize"], SentenceTransformer(
+        model_name,
+        trust_remote_code=trust_remote_code,
+        truncate_dim=params["dimensions"]
+    )
 def get_index(dir: Path, search_time_s: float) -> Dataset:
     faiss_index: faiss.Index = index.get_index("embeddings").faiss_index  # type: ignore
     with open(dir / "params.json", "r") as f:
+        params: Params = json.load(f)
+    under = [p for p in params["optimal_params"] if p["exec_time"] < search_time_s]
+    optimal = max(under, key=(lambda p: p["recall"]))
+    optimal_string = optimal["param_string"]
     ps = faiss.ParameterSpace()
     ps.initialize(faiss_index)
+    ps.set_index_parameters(faiss_index, optimal_string)
     return index
     k = get_env_var("K", int, default=20)  # TODO: can't go higher than 20 yet
     mailto = get_env_var("MAILTO", str, None)
+    normalize, model = get_model(model_name, dir, trust_remote_code)
     index = get_index(dir, search_time_s)
+    model.eval()
     if torch.cuda.is_available():
         model = model.half().cuda() if fp16 else model.bfloat16().cuda()
         # TODO: if huggingface datasets exposes an fp16 gpu option, use it here
     # function signature: (expanded tuple of input batches) -> tuple of output batches
     def search(query: list[str]) -> tuple[list[str]]:
+        query_embedding = model.encode(
+            query, prompt_name, normalize_embeddings=normalize
+        )
         distances, faiss_ids = index.search_batch("embeddings", query_embedding, k)
         faiss_ids_flat = list(chain(*faiss_ids))