Reality123b commited on
Commit
8015a80
·
verified ·
1 Parent(s): 8357215

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -10
app.py CHANGED
@@ -17,7 +17,8 @@ import arxiv
17
  import scholarly
18
  import pymed
19
  import wikipedia
20
- from newspaper import Article
 
21
  import pickle
22
  import faiss
23
  import threading
@@ -33,9 +34,9 @@ if not HF_API_KEY:
33
 
34
  client = InferenceClient(provider="hf-inference", api_key=HF_API_KEY)
35
 
36
- MAIN_LLM_MODEL = "mistralai/Mistral-Nemo-Instruct-2407"
37
- REASONING_LLM_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
38
- CRITIC_LLM_MODEL = "Qwen/QwQ-32B-Preview"
39
  ENSEMBLE_MODELS = [MAIN_LLM_MODEL, REASONING_LLM_MODEL, CRITIC_LLM_MODEL]
40
 
41
  MAX_ITERATIONS = 20
@@ -158,7 +159,7 @@ def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str
158
 
159
  results = [r for r in ddgs.text(**kwargs)]
160
  if results:
161
- return [{"title": r["title"], "snippet": r["body"], "url": r["href"]} for r in results]
162
  else:
163
  if time_filter and "time" in kwargs:
164
  del kwargs["time"]
@@ -465,9 +466,8 @@ def tool_identify_focus_areas(prompt: str, insights: list = [],
465
  return []
466
 
467
  def add_to_faiss_index(text: str):
468
- """Adds the embedding of the given text to the FAISS index."""
469
  embedding = document_similarity_model.encode(text, convert_to_tensor=True)
470
- embedding_np = embedding.cpu().numpy().reshape(1, -1) # Ensure 2D array
471
  if embedding_np.shape[1] != embedding_dim:
472
  logger.error(f"Embedding dimension mismatch: expected {embedding_dim}, got {embedding_np.shape[1]}")
473
  return
@@ -475,12 +475,11 @@ def add_to_faiss_index(text: str):
475
  index.add(embedding_np)
476
 
477
  def search_faiss_index(query: str, top_k: int = 5) -> List[str]:
478
- """Searches the FAISS index for the most similar texts to the query."""
479
  query_embedding = document_similarity_model.encode(query, convert_to_tensor=True)
480
  query_embedding_np = query_embedding.cpu().numpy().reshape(1, -1)
481
  faiss.normalize_L2(query_embedding_np)
482
  distances, indices = index.search(query_embedding_np, top_k)
483
- return indices[0].tolist() # Return indices
484
 
485
  def filter_results(search_results, prompt, previous_snippets=None):
486
  if not main_similarity_model or not search_results:
@@ -507,7 +506,6 @@ def filter_results(search_results, prompt, previous_snippets=None):
507
  result['relevance_score'] = cosine_score
508
  filtered_results.append(result)
509
  seen_snippets.add(result['snippet'])
510
- # Add snippet to FAISS index
511
  add_to_faiss_index(result['snippet'])
512
 
513
 
 
17
  import scholarly
18
  import pymed
19
  import wikipedia
20
+ #from newspaper import Article # Removed direct import
21
+ from newspaper3k import Article # Import from newspaper3k
22
  import pickle
23
  import faiss
24
  import threading
 
34
 
35
  client = InferenceClient(provider="hf-inference", api_key=HF_API_KEY)
36
 
37
+ MAIN_LLM_MODEL = "meta-llama/Llama-3-70b-instruct"
38
+ REASONING_LLM_MODEL = "anthropic/claude-3-opus-20240229"
39
+ CRITIC_LLM_MODEL = "google/gemini-1.5-pro"
40
  ENSEMBLE_MODELS = [MAIN_LLM_MODEL, REASONING_LLM_MODEL, CRITIC_LLM_MODEL]
41
 
42
  MAX_ITERATIONS = 20
 
159
 
160
  results = [r for r in ddgs.text(**kwargs)]
161
  if results:
162
+ return [{"title": r["title"], "snippet": r["body"], "url": r["href"]} for r in results]
163
  else:
164
  if time_filter and "time" in kwargs:
165
  del kwargs["time"]
 
466
  return []
467
 
468
  def add_to_faiss_index(text: str):
 
469
  embedding = document_similarity_model.encode(text, convert_to_tensor=True)
470
+ embedding_np = embedding.cpu().numpy().reshape(1, -1)
471
  if embedding_np.shape[1] != embedding_dim:
472
  logger.error(f"Embedding dimension mismatch: expected {embedding_dim}, got {embedding_np.shape[1]}")
473
  return
 
475
  index.add(embedding_np)
476
 
477
  def search_faiss_index(query: str, top_k: int = 5) -> List[str]:
 
478
  query_embedding = document_similarity_model.encode(query, convert_to_tensor=True)
479
  query_embedding_np = query_embedding.cpu().numpy().reshape(1, -1)
480
  faiss.normalize_L2(query_embedding_np)
481
  distances, indices = index.search(query_embedding_np, top_k)
482
+ return indices[0].tolist()
483
 
484
  def filter_results(search_results, prompt, previous_snippets=None):
485
  if not main_similarity_model or not search_results:
 
506
  result['relevance_score'] = cosine_score
507
  filtered_results.append(result)
508
  seen_snippets.add(result['snippet'])
 
509
  add_to_faiss_index(result['snippet'])
510
 
511