Spaces:

ccm
/

chat-with-idetc

Runtime error

App Files Files Community

ccm commited on Aug 7, 2024

Commit

03004bb

verified ·

1 Parent(s): c487a2a

Update main.py

Browse files

Files changed (1) hide show

main.py +22 -13

main.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import threading  # to allow streaming response
-import time  # to pave the deliver of the message
 import datasets  # for loading RAG database
 import faiss  # to create a search index
@@ -47,11 +47,11 @@ chat_model = transformers.AutoModelForCausalLM.from_pretrained(
 # Create a FAISS index for fast similarity search
 vectors = numpy.stack(data["embedding"].tolist(), axis=0).astype("float32")
-index = faiss.IndexFlatL2(len(data["embedding"][0]))
-index.metric_type = faiss.METRIC_INNER_PRODUCT
 faiss.normalize_L2(vectors)
-index.train(vectors)
-index.add(vectors)
 def preprocess(query: str, k: int) -> tuple[str, str]:
@@ -65,8 +65,8 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
     """
     encoded_query = numpy.expand_dims(embedding_model.encode(query), axis=0)
     faiss.normalize_L2(encoded_query)
-    D, I = index.search(encoded_query, k)
-    top_five = data.loc[I[0]]
     print(top_five["text"].values)
@@ -86,7 +86,6 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
         title = top_five["title"].values[i]
         id = top_five["id"].values[i]
         url = "https://doi.org/10.1115/" + id
-        path = top_five["path"].values[i]
         text = top_five["text"].values[i]
         research_excerpts += (
@@ -104,16 +103,26 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
     print(references)
-    return prompt, "\n\n### References\n\n" + "\n".join(
         [
-            str(i + 1)
             + ". "
-            + ref
-            + "\n  - ".join(["", *['"...' + x + '..."' for x in references[ref]]])
-            for i, ref in enumerate(references.keys())
         ]
     )
 def postprocess(response: str, bypass_from_preprocessing: str) -> str:
     """

 import threading  # to allow streaming response
+import time  # to pave the delivery of the message
 import datasets  # for loading RAG database
 import faiss  # to create a search index
 # Create a FAISS index for fast similarity search
 vectors = numpy.stack(data["embedding"].tolist(), axis=0).astype("float32")
+excerpt_index = faiss.IndexFlatL2(len(data["embedding"][0]))
+excerpt_index.metric_type = faiss.METRIC_INNER_PRODUCT
 faiss.normalize_L2(vectors)
+excerpt_index.train(vectors)
+excerpt_index.add(vectors)
 def preprocess(query: str, k: int) -> tuple[str, str]:
     """
     encoded_query = numpy.expand_dims(embedding_model.encode(query), axis=0)
     faiss.normalize_L2(encoded_query)
+    _, indices = excerpt_index.search(encoded_query, k)
+    top_five = data.loc[indices[0]]
     print(top_five["text"].values)
         title = top_five["title"].values[i]
         id = top_five["id"].values[i]
         url = "https://doi.org/10.1115/" + id
         text = top_five["text"].values[i]
         research_excerpts += (
     print(references)
+    list_of_references = "\n".join(
         [
+            str(idx + 1)
             + ". "
+            + hyperlinked_title
+            + "\n\n> ".join(
+                [
+                    "",
+                    *[
+                        '"...' + excerpt + '..."'
+                        for excerpt in references[hyperlinked_title]
+                    ],
+                ]
+            )
+            for idx, hyperlinked_title in enumerate(references.keys())
         ]
     )
+    return prompt, "\n\n### References\n\n" + list_of_references
 def postprocess(response: str, bypass_from_preprocessing: str) -> str:
     """