SearXNG-WebSearch-Agent

Running

App Files Files Community

Shreyas094 commited on Oct 28

Commit

a2c11b9

•

1 Parent(s): 05d00f6

Update app.py

Browse files

Files changed (1) hide show

app.py +191 -49

app.py CHANGED Viewed

@@ -71,12 +71,12 @@ def fetch_custom_models():
     if not CUSTOM_LLM:
         return []
     try:
-        response = requests.get(f"{CUSTOM_LLM}/v1/models")
         response.raise_for_status()
-        models = response.json().get("data", [])
-        return [model["id"] for model in models]
     except Exception as e:
-        logger.error(f"Error fetching custom models: {e}")
         return []
 # Fetch custom models and determine the default model
@@ -84,7 +84,7 @@ custom_models = fetch_custom_models()
 all_models = ["huggingface", "groq", "mistral"] + custom_models
 # Determine the default model
-default_model = CUSTOM_LLM_DEFAULT_MODEL if CUSTOM_LLM_DEFAULT_MODEL in all_models else "huggingface"
 logger.info(f"Default model selected: {default_model}")
@@ -157,23 +157,44 @@ class MistralModel(AIModel):
 class CustomModel(AIModel):
     def __init__(self, model_name):
         self.model_name = model_name
     def generate_response(self, messages: List[Dict[str, str]], max_tokens: int, temperature: float) -> str:
         try:
             response = requests.post(
-                f"{CUSTOM_LLM}/v1/chat/completions",
                 json={
                     "model": self.model_name,
-                    "messages": messages,
-                    "max_tokens": max_tokens,
-                    "temperature": temperature
                 }
             )
             response.raise_for_status()
-            return response.json()["choices"][0]["message"]["content"].strip()
         except Exception as e:
-            logger.error(f"Error generating response from custom model: {e}")
-            return "Error: Unable to generate response from custom model."
 class AIModelFactory:
     @staticmethod
@@ -357,7 +378,7 @@ def scrape_with_newspaper(url):
         logger.error(f"Error scraping {url} with Newspaper3k: {e}")
         return ""
-def rephrase_query(chat_history, query, temperature=0.2):
     system_prompt = """You are a highly intelligent and context-aware conversational assistant. Your tasks are as follows:
 1. Determine if the new query is a continuation of the previous conversation or an entirely new topic.
@@ -425,14 +446,49 @@ Rephrased query:"""
     ]
     try:
-        logger.info(f"Sending rephrasing request to LLM with temperature {temperature}")
-        response = client.chat_completion(
-            messages=messages,
-            max_tokens=150,
-            temperature=temperature
-        )
-        logger.info("Received rephrased query from LLM")
-        rephrased_question = response.choices[0].message.content.strip()
         # Remove surrounding quotes if present
         if (rephrased_question.startswith('"') and rephrased_question.endswith('"')) or \
@@ -441,8 +497,9 @@ Rephrased query:"""
         logger.info(f"Rephrased Query (cleaned): {rephrased_question}")
         return rephrased_question
     except Exception as e:
-        logger.error(f"Error rephrasing query with LLM: {e}")
         return query  # Fallback to original query if rephrasing fails
 class BM25:
@@ -528,13 +585,33 @@ def prepare_documents_for_bm25(documents: List[Dict]) -> Tuple[List[str], List[D
         Tuple of (document texts, original documents)
     """
     doc_texts = []
     for doc in documents:
-        # Combine title and content for better matching
-        doc_text = f"{doc['title']} {doc['content']}"
-        doc_texts.append(doc_text)
-    return doc_texts, documents
-# Now modify the rerank_documents_with_priority function to include BM25 ranking
 def rerank_documents(query: str, documents: List[Dict],
                     similarity_threshold: float = 0.95, max_results: int = 5) -> List[Dict]:
     try:
@@ -542,9 +619,24 @@ def rerank_documents(query: str, documents: List[Dict],
             logger.warning("No documents to rerank.")
             return documents
         # Step 1: Prepare documents for BM25
-        doc_texts, original_docs = prepare_documents_for_bm25(documents)
         # Step 2: Initialize and fit BM25
         bm25 = BM25()
         bm25.fit(doc_texts)
@@ -554,19 +646,26 @@ def rerank_documents(query: str, documents: List[Dict],
         # Step 4: Get semantic similarity scores
         query_embedding = similarity_model.encode(query, convert_to_tensor=True)
-        doc_summaries = [doc['summary'] for doc in documents]
         doc_embeddings = similarity_model.encode(doc_summaries, convert_to_tensor=True)
         semantic_scores = util.cos_sim(query_embedding, doc_embeddings)[0]
         # Step 5: Combine scores (normalize first)
-        bm25_scores_norm = (bm25_scores - np.min(bm25_scores)) / (np.max(bm25_scores) - np.min(bm25_scores))
-        semantic_scores_norm = (semantic_scores - torch.min(semantic_scores)) / (torch.max(semantic_scores) - torch.min(semantic_scores))
         # Combine scores with weights (0.4 for BM25, 0.6 for semantic similarity)
         combined_scores = 0.4 * bm25_scores_norm + 0.6 * semantic_scores_norm.numpy()
         # Create scored documents with combined scores
-        scored_documents = list(zip(documents, combined_scores))
         # Sort by combined score (descending)
         scored_documents.sort(key=lambda x: x[1], reverse=True)
@@ -621,7 +720,7 @@ def is_content_unique(new_content, existing_contents, similarity_threshold=0.8):
             return False
     return True
-def assess_relevance_and_summarize(llm_client, query, document, temperature=0.2):
     system_prompt = """You are a world-class AI assistant specializing in news analysis and document summarization. Your task is to provide a comprehensive and detailed summary of the given document that captures its key points and relevance to the user's query."""
     user_prompt = f"""
@@ -658,23 +757,57 @@ Remember to:
     ]
     try:
-        response = llm_client.chat_completion(
-            messages=messages,
-            max_tokens=300,
-            temperature=temperature,
-            top_p=0.9,
-            frequency_penalty=1.4
-        )
-        summary = response.choices[0].message.content.strip()
-        # If the summary starts with "Summary: ", remove it
         if summary.startswith("Summary: "):
             summary = summary[9:].strip()
-        # Always return format as if document was relevant
         return f"Relevant: Yes\nSummary: {summary}"
     except Exception as e:
-        logger.error(f"Error summarizing with LLM: {e}")
         return f"Relevant: Yes\nSummary: Error occurred while summarizing the document: {str(e)}"
 def scrape_full_content(url, max_chars=3000, timeout=5, use_pydf2=True):
@@ -747,6 +880,15 @@ Instructions:
                 stream=False
             )
             return response.choices[0].message.content.strip()
         else:  # huggingface
             response = client.chat_completion(
                 messages=messages,
@@ -779,7 +921,7 @@ def search_and_scrape(
 ):
     try:
         # Step 1: Rephrase the Query
-        rephrased_query = rephrase_query(chat_history, query, temperature=llm_temperature)
         logger.info(f"Rephrased Query: {rephrased_query}")
         if not rephrased_query or rephrased_query.lower() == "not_needed":
@@ -896,7 +1038,7 @@ def search_and_scrape(
         relevant_documents = []
         unique_summaries = []
         for doc in scraped_content:
-            assessment = assess_relevance_and_summarize(client, rephrased_query, doc, temperature=llm_temperature)
             relevance, summary = assessment.split('\n', 1)
             if relevance.strip().lower() == "relevant: yes":
@@ -1011,8 +1153,8 @@ iface = gr.ChatInterface(
         gr.Checkbox(label="Only do web search", value=True),  # Add this line
         gr.Slider(5, 20, value=3, step=1, label="Number of initial results"),
         gr.Slider(500, 10000, value=1500, step=100, label="Max characters to retrieve"),
-        gr.Dropdown(["", "day", "week", "month", "year"], value="", label="Time Range"),
-        gr.Dropdown(["", "all", "en", "fr", "de", "es", "it", "nl", "pt", "pl", "ru", "zh"], value="", label="Language"),
         gr.Dropdown(["", "general", "news", "images", "videos", "music", "files", "it", "science", "social media"], value="general", label="Category"),
         gr.Dropdown(
             ["google", "bing", "duckduckgo", "baidu", "yahoo", "qwant", "startpage"],
@@ -1040,4 +1182,4 @@ iface = gr.ChatInterface(
 if __name__ == "__main__":
     logger.info("Starting the SearXNG Scraper for News using ChatInterface with Advanced Parameters")
-    iface.launch(share=True)

     if not CUSTOM_LLM:
         return []
     try:
+        response = requests.get(f"{CUSTOM_LLM}/api/tags")  # Ollama endpoint for listing models
         response.raise_for_status()
+        models = response.json().get("models", [])
+        return [model["name"] for model in models]  # Ollama returns model names directly
     except Exception as e:
+        logger.error(f"Error fetching Ollama models: {e}")
         return []
 # Fetch custom models and determine the default model
 all_models = ["huggingface", "groq", "mistral"] + custom_models
 # Determine the default model
+default_model = CUSTOM_LLM_DEFAULT_MODEL if CUSTOM_LLM_DEFAULT_MODEL in all_models else "groq"
 logger.info(f"Default model selected: {default_model}")
 class CustomModel(AIModel):
     def __init__(self, model_name):
         self.model_name = model_name
+        self.base_url = os.getenv("CUSTOM_LLM", "http://localhost:11434")
     def generate_response(self, messages: List[Dict[str, str]], max_tokens: int, temperature: float) -> str:
         try:
+            # Convert messages to Ollama format
+            prompt = "\n".join([
+                f"{msg['role'].capitalize()}: {msg['content']}"
+                for msg in messages
+            ])
             response = requests.post(
+                f"{self.base_url}/api/generate",  # Ollama endpoint
                 json={
                     "model": self.model_name,
+                    "prompt": prompt,
+                    "options": {
+                        "num_predict": max_tokens,
+                        "temperature": temperature
+                    }
                 }
             )
             response.raise_for_status()
+            # Handle Ollama's streaming response
+            full_response = ""
+            for line in response.iter_lines():
+                if line:
+                    chunk = json.loads(line)
+                    if 'response' in chunk:
+                        full_response += chunk['response']
+                    if chunk.get('done', False):
+                        break
+            return full_response.strip()
         except Exception as e:
+            logger.error(f"Error generating response from Ollama model: {e}")
+            return f"Error: Unable to generate response from Ollama model. {str(e)}"
 class AIModelFactory:
     @staticmethod
         logger.error(f"Error scraping {url} with Newspaper3k: {e}")
         return ""
+def rephrase_query(chat_history, query, model, temperature=0.2) -> str:
     system_prompt = """You are a highly intelligent and context-aware conversational assistant. Your tasks are as follows:
 1. Determine if the new query is a continuation of the previous conversation or an entirely new topic.
     ]
     try:
+        logger.info(f"Sending rephrasing request to {model} with temperature {temperature}")
+        if model == "groq":
+            response = groq_client.chat.completions.create(
+                messages=messages,
+                model="llama-3.1-70b-versatile",
+                max_tokens=150,
+                temperature=temperature,
+                top_p=0.9,
+                presence_penalty=1.2,
+                stream=False
+            )
+            rephrased_question = response.choices[0].message.content.strip()
+        elif model == "mistral":
+            response = mistral_client.chat.complete(
+                model="open-mistral-nemo",
+                messages=messages,
+                max_tokens=150,
+                temperature=temperature,
+                top_p=0.9,
+                stream=False
+            )
+            rephrased_question = response.choices[0].message.content.strip()
+        elif CUSTOM_LLM and model in fetch_custom_models():
+            # Create CustomModel instance for Ollama
+            custom_model = CustomModel(model)
+            rephrased_question = custom_model.generate_response(
+                messages=messages,
+                max_tokens=150,
+                temperature=temperature
+            )
+        else:  # huggingface
+            response = client.chat_completion(
+                messages=messages,
+                max_tokens=150,
+                temperature=temperature,
+                frequency_penalty=1.4,
+                top_p=0.9
+            )
+            rephrased_question = response.choices[0].message.content.strip()
         # Remove surrounding quotes if present
         if (rephrased_question.startswith('"') and rephrased_question.endswith('"')) or \
         logger.info(f"Rephrased Query (cleaned): {rephrased_question}")
         return rephrased_question
     except Exception as e:
+        logger.error(f"Error rephrasing query with {model} LLM: {e}")
         return query  # Fallback to original query if rephrasing fails
 class BM25:
         Tuple of (document texts, original documents)
     """
     doc_texts = []
+    valid_documents = []
     for doc in documents:
+        try:
+            # Get title and content with default empty strings if missing
+            title = doc.get('title', '')
+            content = doc.get('content', '')
+            # Skip documents with no content and title
+            if not (title.strip() or content.strip()):
+                logger.warning(f"Skipping document with no title or content: {doc}")
+                continue
+            # Combine title and content for better matching
+            doc_text = f"{title} {content}".strip()
+            doc_texts.append(doc_text)
+            valid_documents.append(doc)
+        except Exception as e:
+            logger.warning(f"Error processing document {doc}: {e}")
+            continue
+    if not valid_documents:
+        raise ValueError("No valid documents found with required fields")
+    return doc_texts, valid_documents
 def rerank_documents(query: str, documents: List[Dict],
                     similarity_threshold: float = 0.95, max_results: int = 5) -> List[Dict]:
     try:
             logger.warning("No documents to rerank.")
             return documents
+        # Validate input documents
+        if not all(isinstance(doc, dict) for doc in documents):
+            raise ValueError("All documents must be dictionaries")
         # Step 1: Prepare documents for BM25
+        doc_texts, valid_docs = prepare_documents_for_bm25(documents)
+        if not valid_docs:
+            logger.warning("No valid documents after preparation.")
+            return documents[:max_results]
+        # Verify all documents have summaries for semantic scoring
+        valid_docs = [doc for doc in valid_docs if 'summary' in doc and doc['summary'].strip()]
+        if not valid_docs:
+            logger.warning("No documents with valid summaries found.")
+            return documents[:max_results]
         # Step 2: Initialize and fit BM25
         bm25 = BM25()
         bm25.fit(doc_texts)
         # Step 4: Get semantic similarity scores
         query_embedding = similarity_model.encode(query, convert_to_tensor=True)
+        doc_summaries = [doc['summary'] for doc in valid_docs]
         doc_embeddings = similarity_model.encode(doc_summaries, convert_to_tensor=True)
         semantic_scores = util.cos_sim(query_embedding, doc_embeddings)[0]
         # Step 5: Combine scores (normalize first)
+        if len(bm25_scores) > 1:
+            bm25_scores_norm = (bm25_scores - np.min(bm25_scores)) / (np.max(bm25_scores) - np.min(bm25_scores))
+        else:
+            bm25_scores_norm = bm25_scores
+        if len(semantic_scores) > 1:
+            semantic_scores_norm = (semantic_scores - torch.min(semantic_scores)) / (torch.max(semantic_scores) - torch.min(semantic_scores))
+        else:
+            semantic_scores_norm = semantic_scores
         # Combine scores with weights (0.4 for BM25, 0.6 for semantic similarity)
         combined_scores = 0.4 * bm25_scores_norm + 0.6 * semantic_scores_norm.numpy()
         # Create scored documents with combined scores
+        scored_documents = list(zip(valid_docs, combined_scores))
         # Sort by combined score (descending)
         scored_documents.sort(key=lambda x: x[1], reverse=True)
             return False
     return True
+def assess_relevance_and_summarize(llm_client, query, document, model, temperature=0.2) -> str:
     system_prompt = """You are a world-class AI assistant specializing in news analysis and document summarization. Your task is to provide a comprehensive and detailed summary of the given document that captures its key points and relevance to the user's query."""
     user_prompt = f"""
     ]
     try:
+        if model == "groq":
+            response = groq_client.chat.completions.create(
+                messages=messages,
+                model="llama-3.1-70b-versatile",
+                max_tokens=500,
+                temperature=temperature,
+                top_p=0.9,
+                presence_penalty=1.2,
+                stream=False
+            )
+            summary = response.choices[0].message.content.strip()
+        elif model == "mistral":
+            response = mistral_client.chat.complete(
+                model="open-mistral-nemo",
+                messages=messages,
+                max_tokens=500,
+                temperature=temperature,
+                top_p=0.9,
+                stream=False
+            )
+            summary = response.choices[0].message.content.strip()
+        elif CUSTOM_LLM and model in fetch_custom_models():
+            # Create CustomModel instance for Ollama
+            custom_model = CustomModel(model)
+            summary = custom_model.generate_response(
+                messages=messages,
+                max_tokens=500,
+                temperature=temperature
+            )
+        else:  # huggingface
+            response = client.chat_completion(
+                messages=messages,
+                max_tokens=500,
+                temperature=temperature,
+                frequency_penalty=1.4,
+                top_p=0.9
+            )
+            summary = response.choices[0].message.content.strip()
+        # Clean up the summary if needed
         if summary.startswith("Summary: "):
             summary = summary[9:].strip()
         return f"Relevant: Yes\nSummary: {summary}"
     except Exception as e:
+        error_msg = f"Error summarizing with {model} LLM: {str(e)}"
+        logger.error(error_msg)
         return f"Relevant: Yes\nSummary: Error occurred while summarizing the document: {str(e)}"
 def scrape_full_content(url, max_chars=3000, timeout=5, use_pydf2=True):
                 stream=False
             )
             return response.choices[0].message.content.strip()
+        elif CUSTOM_LLM and model in fetch_custom_models():
+            # Create CustomModel instance for Ollama
+            custom_model = CustomModel(model)
+            response = custom_model.generate_response(
+                messages=messages,
+                max_tokens=5000,
+                temperature=temperature
+            )
+            return response
         else:  # huggingface
             response = client.chat_completion(
                 messages=messages,
 ):
     try:
         # Step 1: Rephrase the Query
+        rephrased_query = rephrase_query(chat_history, query, model, temperature=llm_temperature)
         logger.info(f"Rephrased Query: {rephrased_query}")
         if not rephrased_query or rephrased_query.lower() == "not_needed":
         relevant_documents = []
         unique_summaries = []
         for doc in scraped_content:
+            assessment = assess_relevance_and_summarize(client, rephrased_query, doc, model, temperature=llm_temperature)
             relevance, summary = assessment.split('\n', 1)
             if relevance.strip().lower() == "relevant: yes":
         gr.Checkbox(label="Only do web search", value=True),  # Add this line
         gr.Slider(5, 20, value=3, step=1, label="Number of initial results"),
         gr.Slider(500, 10000, value=1500, step=100, label="Max characters to retrieve"),
+        gr.Dropdown(["", "day", "week", "month", "year"], value="week", label="Time Range"),
+        gr.Dropdown(["", "all", "en", "fr", "de", "es", "it", "nl", "pt", "pl", "ru", "zh"], value="en", label="Language"),
         gr.Dropdown(["", "general", "news", "images", "videos", "music", "files", "it", "science", "social media"], value="general", label="Category"),
         gr.Dropdown(
             ["google", "bing", "duckduckgo", "baidu", "yahoo", "qwant", "startpage"],
 if __name__ == "__main__":
     logger.info("Starting the SearXNG Scraper for News using ChatInterface with Advanced Parameters")
+    iface.launch(server_name="0.0.0.0", server_port=7862, share=False)