Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -174,9 +174,23 @@ Guidelines:
|
|
174 |
return f"I apologize, but I encountered an error while processing your query: {str(e)}"
|
175 |
|
176 |
async def rephrase_query(chat_history, query, temperature=0.2) -> str:
|
177 |
-
"""Rephrase the query based on chat history and context."""
|
178 |
logger.info(f'Rephrasing query: {query}')
|
|
|
179 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
# Format recent conversation history (last 3 turns for context)
|
181 |
formatted_history = []
|
182 |
for i, (user_msg, assistant_msg) in enumerate(chat_history[-3:], 1):
|
@@ -186,51 +200,47 @@ async def rephrase_query(chat_history, query, temperature=0.2) -> str:
|
|
186 |
formatted_history.append(f"Assistant: {assistant_msg}")
|
187 |
|
188 |
chat_context = "\n".join(formatted_history)
|
189 |
-
current_year = datetime.now().year
|
190 |
-
|
191 |
system_prompt = """You are a highly intelligent query rephrasing assistant. Your task is to analyze the conversation history and current query to generate a complete, contextual search query.
|
192 |
|
193 |
Key Rules:
|
194 |
1. For follow-up questions or queries referencing previous conversation:
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
Should become: "Latest structure and developments in German banking industry after: 2024"
|
201 |
|
202 |
2. Entity Handling:
|
203 |
-
|
204 |
-
|
205 |
-
|
|
|
206 |
|
207 |
3. Date and Time Context:
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
|
217 |
4. Query Formatting:
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
Remember: Your goal is to create a complete, self-contained query that includes all necessary context from the conversation history."""
|
224 |
|
225 |
messages = [
|
226 |
{"role": "system", "content": system_prompt},
|
227 |
{"role": "user", "content": f"""Current year: {current_year}
|
228 |
-
|
229 |
Recent conversation history:
|
230 |
{chat_context}
|
231 |
-
|
232 |
-
Current query: {query}
|
233 |
-
|
234 |
Please rephrase this query into a complete, contextual search query following the rules above. The rephrased query should be clear and complete even without the conversation context."""}
|
235 |
]
|
236 |
|
@@ -243,6 +253,11 @@ Please rephrase this query into a complete, contextual search query following th
|
|
243 |
)
|
244 |
|
245 |
rephrased_query = response.choices[0].message.content.strip()
|
|
|
|
|
|
|
|
|
|
|
246 |
logger.info(f'Query rephrased to: {rephrased_query}')
|
247 |
return rephrased_query
|
248 |
|
|
|
174 |
return f"I apologize, but I encountered an error while processing your query: {str(e)}"
|
175 |
|
176 |
async def rephrase_query(chat_history, query, temperature=0.2) -> str:
|
177 |
+
"""Rephrase the query based on chat history and context while preserving URLs."""
|
178 |
logger.info(f'Rephrasing query: {query}')
|
179 |
+
|
180 |
try:
|
181 |
+
# Extract URLs from the query
|
182 |
+
url_pattern = r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+'
|
183 |
+
urls = re.findall(url_pattern, query)
|
184 |
+
|
185 |
+
# If URLs are found, store them and replace with placeholders
|
186 |
+
url_placeholders = {}
|
187 |
+
modified_query = query
|
188 |
+
if urls:
|
189 |
+
for idx, url in enumerate(urls):
|
190 |
+
placeholder = f"__URL_{idx}__"
|
191 |
+
url_placeholders[placeholder] = url
|
192 |
+
modified_query = modified_query.replace(url, placeholder)
|
193 |
+
|
194 |
# Format recent conversation history (last 3 turns for context)
|
195 |
formatted_history = []
|
196 |
for i, (user_msg, assistant_msg) in enumerate(chat_history[-3:], 1):
|
|
|
200 |
formatted_history.append(f"Assistant: {assistant_msg}")
|
201 |
|
202 |
chat_context = "\n".join(formatted_history)
|
203 |
+
current_year = datetime.now().year
|
204 |
+
|
205 |
system_prompt = """You are a highly intelligent query rephrasing assistant. Your task is to analyze the conversation history and current query to generate a complete, contextual search query.
|
206 |
|
207 |
Key Rules:
|
208 |
1. For follow-up questions or queries referencing previous conversation:
|
209 |
+
- Extract the main topic/subject from previous messages
|
210 |
+
- Combine previous context with the current query
|
211 |
+
- Example: Previous: "What is the structure of German banking industry?"
|
212 |
+
Current: "can you do more latest web search on my previous query"
|
213 |
+
Should become: "Latest structure and developments in German banking industry after: 2024"
|
|
|
214 |
|
215 |
2. Entity Handling:
|
216 |
+
- Identify and preserve main entities from context
|
217 |
+
- Enclose ONLY entity names in double quotes
|
218 |
+
- Example: "Deutsche Bank" profits, not "Deutsche Bank profits"
|
219 |
+
- Preserve URL placeholders exactly as they appear (marked with __URL_N__)
|
220 |
|
221 |
3. Date and Time Context:
|
222 |
+
- For queries about current/latest information:
|
223 |
+
* Keep time-related words (latest, current, recent, now)
|
224 |
+
* ALWAYS append "after: YYYY" (current year)
|
225 |
+
- For specific time periods:
|
226 |
+
* Preserve the original time reference
|
227 |
+
* Add appropriate "after: YYYY" based on context
|
228 |
+
- For queries without time reference:
|
229 |
+
* Add "after: YYYY" if about current state/status
|
230 |
|
231 |
4. Query Formatting:
|
232 |
+
- Capitalize first letter
|
233 |
+
- No period at end
|
234 |
+
- Include all relevant context
|
235 |
+
- Maintain clear and searchable structure
|
236 |
+
- IMPORTANT: Keep URL placeholders (__URL_N__) exactly as they appear"""
|
|
|
237 |
|
238 |
messages = [
|
239 |
{"role": "system", "content": system_prompt},
|
240 |
{"role": "user", "content": f"""Current year: {current_year}
|
|
|
241 |
Recent conversation history:
|
242 |
{chat_context}
|
243 |
+
Current query: {modified_query}
|
|
|
|
|
244 |
Please rephrase this query into a complete, contextual search query following the rules above. The rephrased query should be clear and complete even without the conversation context."""}
|
245 |
]
|
246 |
|
|
|
253 |
)
|
254 |
|
255 |
rephrased_query = response.choices[0].message.content.strip()
|
256 |
+
|
257 |
+
# Replace placeholders with original URLs
|
258 |
+
for placeholder, url in url_placeholders.items():
|
259 |
+
rephrased_query = rephrased_query.replace(placeholder, url)
|
260 |
+
|
261 |
logger.info(f'Query rephrased to: {rephrased_query}')
|
262 |
return rephrased_query
|
263 |
|