Spaces:

Shreyas094
/

SearXNG-AI-v2

Running

App Files Files Community

Shreyas094 commited on Nov 13, 2024

Commit

8552f58

verified ·

1 Parent(s): 4040242

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -30

app.py CHANGED Viewed

@@ -174,9 +174,23 @@ Guidelines:
         return f"I apologize, but I encountered an error while processing your query: {str(e)}"
 async def rephrase_query(chat_history, query, temperature=0.2) -> str:
-    """Rephrase the query based on chat history and context."""
     logger.info(f'Rephrasing query: {query}')
     try:
         # Format recent conversation history (last 3 turns for context)
         formatted_history = []
         for i, (user_msg, assistant_msg) in enumerate(chat_history[-3:], 1):
@@ -186,51 +200,47 @@ async def rephrase_query(chat_history, query, temperature=0.2) -> str:
                 formatted_history.append(f"Assistant: {assistant_msg}")
         chat_context = "\n".join(formatted_history)
-        current_year = datetime.now().year
         system_prompt = """You are a highly intelligent query rephrasing assistant. Your task is to analyze the conversation history and current query to generate a complete, contextual search query.
 Key Rules:
 1. For follow-up questions or queries referencing previous conversation:
-   - Extract the main topic/subject from previous messages
-   - Combine previous context with the current query
-   - Example:
-     Previous: "What is the structure of German banking industry?"
-     Current: "can you do more latest web search on my previous query"
-     Should become: "Latest structure and developments in German banking industry after: 2024"
 2. Entity Handling:
-   - Identify and preserve main entities from context
-   - Enclose ONLY entity names in double quotes
-   - Example: "Deutsche Bank" profits, not "Deutsche Bank profits"
 3. Date and Time Context:
-   - For queries about current/latest information:
-     * Keep time-related words (latest, current, recent, now)
-     * ALWAYS append "after: YYYY" (current year)
-   - For specific time periods:
-     * Preserve the original time reference
-     * Add appropriate "after: YYYY" based on context
-   - For queries without time reference:
-     * Add "after: YYYY" if about current state/status
 4. Query Formatting:
-   - Capitalize first letter
-   - No period at end
-   - Include all relevant context
-   - Maintain clear and searchable structure
-Remember: Your goal is to create a complete, self-contained query that includes all necessary context from the conversation history."""
         messages = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": f"""Current year: {current_year}
 Recent conversation history:
 {chat_context}
-Current query: {query}
 Please rephrase this query into a complete, contextual search query following the rules above. The rephrased query should be clear and complete even without the conversation context."""}
         ]
@@ -243,6 +253,11 @@ Please rephrase this query into a complete, contextual search query following th
         )
         rephrased_query = response.choices[0].message.content.strip()
         logger.info(f'Query rephrased to: {rephrased_query}')
         return rephrased_query

         return f"I apologize, but I encountered an error while processing your query: {str(e)}"
 async def rephrase_query(chat_history, query, temperature=0.2) -> str:
+    """Rephrase the query based on chat history and context while preserving URLs."""
     logger.info(f'Rephrasing query: {query}')
     try:
+        # Extract URLs from the query
+        url_pattern = r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+'
+        urls = re.findall(url_pattern, query)
+        # If URLs are found, store them and replace with placeholders
+        url_placeholders = {}
+        modified_query = query
+        if urls:
+            for idx, url in enumerate(urls):
+                placeholder = f"__URL_{idx}__"
+                url_placeholders[placeholder] = url
+                modified_query = modified_query.replace(url, placeholder)
         # Format recent conversation history (last 3 turns for context)
         formatted_history = []
         for i, (user_msg, assistant_msg) in enumerate(chat_history[-3:], 1):
                 formatted_history.append(f"Assistant: {assistant_msg}")
         chat_context = "\n".join(formatted_history)
+        current_year = datetime.now().year
         system_prompt = """You are a highly intelligent query rephrasing assistant. Your task is to analyze the conversation history and current query to generate a complete, contextual search query.
 Key Rules:
 1. For follow-up questions or queries referencing previous conversation:
+ - Extract the main topic/subject from previous messages
+ - Combine previous context with the current query
+ - Example: Previous: "What is the structure of German banking industry?"
+   Current: "can you do more latest web search on my previous query"
+   Should become: "Latest structure and developments in German banking industry after: 2024"
 2. Entity Handling:
+ - Identify and preserve main entities from context
+ - Enclose ONLY entity names in double quotes
+ - Example: "Deutsche Bank" profits, not "Deutsche Bank profits"
+ - Preserve URL placeholders exactly as they appear (marked with __URL_N__)
 3. Date and Time Context:
+ - For queries about current/latest information:
+   * Keep time-related words (latest, current, recent, now)
+   * ALWAYS append "after: YYYY" (current year)
+ - For specific time periods:
+   * Preserve the original time reference
+   * Add appropriate "after: YYYY" based on context
+ - For queries without time reference:
+   * Add "after: YYYY" if about current state/status
 4. Query Formatting:
+ - Capitalize first letter
+ - No period at end
+ - Include all relevant context
+ - Maintain clear and searchable structure
+ - IMPORTANT: Keep URL placeholders (__URL_N__) exactly as they appear"""
         messages = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": f"""Current year: {current_year}
 Recent conversation history:
 {chat_context}
+Current query: {modified_query}
 Please rephrase this query into a complete, contextual search query following the rules above. The rephrased query should be clear and complete even without the conversation context."""}
         ]
         )
         rephrased_query = response.choices[0].message.content.strip()
+        # Replace placeholders with original URLs
+        for placeholder, url in url_placeholders.items():
+            rephrased_query = rephrased_query.replace(placeholder, url)
         logger.info(f'Query rephrased to: {rephrased_query}')
         return rephrased_query