Spaces:

somosnlp-hackathon-2023
/

learning-assistance

Runtime error

milyiyo commited on Apr 4, 2023

Commit

0bcdbb7

1 Parent(s): 941bfc4

Store the extracted text to be used during faiss' index creation

Files changed (1) hide show

functions.py CHANGED Viewed

@@ -18,13 +18,9 @@ device = 'cuda'
 shared = {
     'answer_context': None,
     'embeddings_dataset': None,
-    'base_text': None,
 }
-def store_text_changes(text):
-    shared['base_text'] = text
 def get_nearest_examples(question: str, k: int):
     print(['get_nearest_examples', 'start'])
     question_embedding = get_embeddings([question]).cpu().detach().numpy()
@@ -69,6 +65,7 @@ def extract_text(url: str):
     response = requests.get(url)
     soup = BeautifulSoup(response.text, "html.parser")
     text = '\n\n'.join(map(lambda p: p.text, soup.find_all('p')))
     print(['extract_text', 'end'])
     return text
@@ -121,10 +118,10 @@ def get_answer_context():
 def answer_question(question: str):
-    return ', '.join([len(shared['base_text']), len(question)])
     print(['answer_question', 'start'])
     if not shared['embeddings_dataset']:
-        build_faiss_index(full_text)
     top_k_samples = get_nearest_examples(question, k=5)
     context = '\n'.join(top_k_samples)
@@ -170,7 +167,7 @@ def load_embeddings_model():
     return model, tokenizer
-# model, tokenizer = load_model(
-#     "hackathon-somos-nlp-2023/opt-6.7b-lora-sag-t3000-v300-v2")
-# emb_model, emb_tokenizer = load_embeddings_model()

 shared = {
     'answer_context': None,
     'embeddings_dataset': None,
+    'full_text': None,
 }
 def get_nearest_examples(question: str, k: int):
     print(['get_nearest_examples', 'start'])
     question_embedding = get_embeddings([question]).cpu().detach().numpy()
     response = requests.get(url)
     soup = BeautifulSoup(response.text, "html.parser")
     text = '\n\n'.join(map(lambda p: p.text, soup.find_all('p')))
+    shared['full_text'] = text
     print(['extract_text', 'end'])
     return text
 def answer_question(question: str):
+    # return ', '.join([len(shared['base_text']), len(question)])
     print(['answer_question', 'start'])
     if not shared['embeddings_dataset']:
+        build_faiss_index(shared['full_text'])
     top_k_samples = get_nearest_examples(question, k=5)
     context = '\n'.join(top_k_samples)
     return model, tokenizer
+model, tokenizer = load_model(
+    "hackathon-somos-nlp-2023/opt-6.7b-lora-sag-t3000-v300-v2")
+emb_model, emb_tokenizer = load_embeddings_model()