Spaces:
Runtime error
Runtime error
Store the extracted text to be used during faiss' index creation
Browse files- functions.py +7 -10
functions.py
CHANGED
@@ -18,13 +18,9 @@ device = 'cuda'
|
|
18 |
shared = {
|
19 |
'answer_context': None,
|
20 |
'embeddings_dataset': None,
|
21 |
-
'
|
22 |
}
|
23 |
|
24 |
-
def store_text_changes(text):
|
25 |
-
shared['base_text'] = text
|
26 |
-
|
27 |
-
|
28 |
def get_nearest_examples(question: str, k: int):
|
29 |
print(['get_nearest_examples', 'start'])
|
30 |
question_embedding = get_embeddings([question]).cpu().detach().numpy()
|
@@ -69,6 +65,7 @@ def extract_text(url: str):
|
|
69 |
response = requests.get(url)
|
70 |
soup = BeautifulSoup(response.text, "html.parser")
|
71 |
text = '\n\n'.join(map(lambda p: p.text, soup.find_all('p')))
|
|
|
72 |
print(['extract_text', 'end'])
|
73 |
return text
|
74 |
|
@@ -121,10 +118,10 @@ def get_answer_context():
|
|
121 |
|
122 |
|
123 |
def answer_question(question: str):
|
124 |
-
return ', '.join([len(shared['base_text']), len(question)])
|
125 |
print(['answer_question', 'start'])
|
126 |
if not shared['embeddings_dataset']:
|
127 |
-
build_faiss_index(full_text)
|
128 |
top_k_samples = get_nearest_examples(question, k=5)
|
129 |
|
130 |
context = '\n'.join(top_k_samples)
|
@@ -170,7 +167,7 @@ def load_embeddings_model():
|
|
170 |
return model, tokenizer
|
171 |
|
172 |
|
173 |
-
|
174 |
-
|
175 |
|
176 |
-
|
|
|
18 |
shared = {
|
19 |
'answer_context': None,
|
20 |
'embeddings_dataset': None,
|
21 |
+
'full_text': None,
|
22 |
}
|
23 |
|
|
|
|
|
|
|
|
|
24 |
def get_nearest_examples(question: str, k: int):
|
25 |
print(['get_nearest_examples', 'start'])
|
26 |
question_embedding = get_embeddings([question]).cpu().detach().numpy()
|
|
|
65 |
response = requests.get(url)
|
66 |
soup = BeautifulSoup(response.text, "html.parser")
|
67 |
text = '\n\n'.join(map(lambda p: p.text, soup.find_all('p')))
|
68 |
+
shared['full_text'] = text
|
69 |
print(['extract_text', 'end'])
|
70 |
return text
|
71 |
|
|
|
118 |
|
119 |
|
120 |
def answer_question(question: str):
|
121 |
+
# return ', '.join([len(shared['base_text']), len(question)])
|
122 |
print(['answer_question', 'start'])
|
123 |
if not shared['embeddings_dataset']:
|
124 |
+
build_faiss_index(shared['full_text'])
|
125 |
top_k_samples = get_nearest_examples(question, k=5)
|
126 |
|
127 |
context = '\n'.join(top_k_samples)
|
|
|
167 |
return model, tokenizer
|
168 |
|
169 |
|
170 |
+
model, tokenizer = load_model(
|
171 |
+
"hackathon-somos-nlp-2023/opt-6.7b-lora-sag-t3000-v300-v2")
|
172 |
|
173 |
+
emb_model, emb_tokenizer = load_embeddings_model()
|