Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,8 @@ import arxiv
|
|
17 |
import scholarly
|
18 |
import pymed
|
19 |
import wikipedia
|
20 |
-
from newspaper import Article
|
|
|
21 |
import pickle
|
22 |
import faiss
|
23 |
import threading
|
@@ -33,9 +34,9 @@ if not HF_API_KEY:
|
|
33 |
|
34 |
client = InferenceClient(provider="hf-inference", api_key=HF_API_KEY)
|
35 |
|
36 |
-
MAIN_LLM_MODEL = "
|
37 |
-
REASONING_LLM_MODEL = "
|
38 |
-
CRITIC_LLM_MODEL = "
|
39 |
ENSEMBLE_MODELS = [MAIN_LLM_MODEL, REASONING_LLM_MODEL, CRITIC_LLM_MODEL]
|
40 |
|
41 |
MAX_ITERATIONS = 20
|
@@ -158,7 +159,7 @@ def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str
|
|
158 |
|
159 |
results = [r for r in ddgs.text(**kwargs)]
|
160 |
if results:
|
161 |
-
return [{"title": r["title"], "snippet": r["body"], "url": r["href"]} for r in results]
|
162 |
else:
|
163 |
if time_filter and "time" in kwargs:
|
164 |
del kwargs["time"]
|
@@ -465,9 +466,8 @@ def tool_identify_focus_areas(prompt: str, insights: list = [],
|
|
465 |
return []
|
466 |
|
467 |
def add_to_faiss_index(text: str):
|
468 |
-
"""Adds the embedding of the given text to the FAISS index."""
|
469 |
embedding = document_similarity_model.encode(text, convert_to_tensor=True)
|
470 |
-
embedding_np = embedding.cpu().numpy().reshape(1, -1)
|
471 |
if embedding_np.shape[1] != embedding_dim:
|
472 |
logger.error(f"Embedding dimension mismatch: expected {embedding_dim}, got {embedding_np.shape[1]}")
|
473 |
return
|
@@ -475,12 +475,11 @@ def add_to_faiss_index(text: str):
|
|
475 |
index.add(embedding_np)
|
476 |
|
477 |
def search_faiss_index(query: str, top_k: int = 5) -> List[str]:
|
478 |
-
"""Searches the FAISS index for the most similar texts to the query."""
|
479 |
query_embedding = document_similarity_model.encode(query, convert_to_tensor=True)
|
480 |
query_embedding_np = query_embedding.cpu().numpy().reshape(1, -1)
|
481 |
faiss.normalize_L2(query_embedding_np)
|
482 |
distances, indices = index.search(query_embedding_np, top_k)
|
483 |
-
return indices[0].tolist()
|
484 |
|
485 |
def filter_results(search_results, prompt, previous_snippets=None):
|
486 |
if not main_similarity_model or not search_results:
|
@@ -507,7 +506,6 @@ def filter_results(search_results, prompt, previous_snippets=None):
|
|
507 |
result['relevance_score'] = cosine_score
|
508 |
filtered_results.append(result)
|
509 |
seen_snippets.add(result['snippet'])
|
510 |
-
# Add snippet to FAISS index
|
511 |
add_to_faiss_index(result['snippet'])
|
512 |
|
513 |
|
|
|
17 |
import scholarly
|
18 |
import pymed
|
19 |
import wikipedia
|
20 |
+
#from newspaper import Article # Removed direct import
|
21 |
+
from newspaper3k import Article # Import from newspaper3k
|
22 |
import pickle
|
23 |
import faiss
|
24 |
import threading
|
|
|
34 |
|
35 |
client = InferenceClient(provider="hf-inference", api_key=HF_API_KEY)
|
36 |
|
37 |
+
MAIN_LLM_MODEL = "meta-llama/Llama-3-70b-instruct"
|
38 |
+
REASONING_LLM_MODEL = "anthropic/claude-3-opus-20240229"
|
39 |
+
CRITIC_LLM_MODEL = "google/gemini-1.5-pro"
|
40 |
ENSEMBLE_MODELS = [MAIN_LLM_MODEL, REASONING_LLM_MODEL, CRITIC_LLM_MODEL]
|
41 |
|
42 |
MAX_ITERATIONS = 20
|
|
|
159 |
|
160 |
results = [r for r in ddgs.text(**kwargs)]
|
161 |
if results:
|
162 |
+
return [{"title": r["title"], "snippet": r["body"], "url": r["href"]} for r in results]
|
163 |
else:
|
164 |
if time_filter and "time" in kwargs:
|
165 |
del kwargs["time"]
|
|
|
466 |
return []
|
467 |
|
468 |
def add_to_faiss_index(text: str):
|
|
|
469 |
embedding = document_similarity_model.encode(text, convert_to_tensor=True)
|
470 |
+
embedding_np = embedding.cpu().numpy().reshape(1, -1)
|
471 |
if embedding_np.shape[1] != embedding_dim:
|
472 |
logger.error(f"Embedding dimension mismatch: expected {embedding_dim}, got {embedding_np.shape[1]}")
|
473 |
return
|
|
|
475 |
index.add(embedding_np)
|
476 |
|
477 |
def search_faiss_index(query: str, top_k: int = 5) -> List[str]:
|
|
|
478 |
query_embedding = document_similarity_model.encode(query, convert_to_tensor=True)
|
479 |
query_embedding_np = query_embedding.cpu().numpy().reshape(1, -1)
|
480 |
faiss.normalize_L2(query_embedding_np)
|
481 |
distances, indices = index.search(query_embedding_np, top_k)
|
482 |
+
return indices[0].tolist()
|
483 |
|
484 |
def filter_results(search_results, prompt, previous_snippets=None):
|
485 |
if not main_similarity_model or not search_results:
|
|
|
506 |
result['relevance_score'] = cosine_score
|
507 |
filtered_results.append(result)
|
508 |
seen_snippets.add(result['snippet'])
|
|
|
509 |
add_to_faiss_index(result['snippet'])
|
510 |
|
511 |
|