Shreyas094 commited on
Commit
8552f58
·
verified ·
1 Parent(s): 4040242

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -30
app.py CHANGED
@@ -174,9 +174,23 @@ Guidelines:
174
  return f"I apologize, but I encountered an error while processing your query: {str(e)}"
175
 
176
  async def rephrase_query(chat_history, query, temperature=0.2) -> str:
177
- """Rephrase the query based on chat history and context."""
178
  logger.info(f'Rephrasing query: {query}')
 
179
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  # Format recent conversation history (last 3 turns for context)
181
  formatted_history = []
182
  for i, (user_msg, assistant_msg) in enumerate(chat_history[-3:], 1):
@@ -186,51 +200,47 @@ async def rephrase_query(chat_history, query, temperature=0.2) -> str:
186
  formatted_history.append(f"Assistant: {assistant_msg}")
187
 
188
  chat_context = "\n".join(formatted_history)
189
- current_year = datetime.now().year
190
-
191
  system_prompt = """You are a highly intelligent query rephrasing assistant. Your task is to analyze the conversation history and current query to generate a complete, contextual search query.
192
 
193
  Key Rules:
194
  1. For follow-up questions or queries referencing previous conversation:
195
- - Extract the main topic/subject from previous messages
196
- - Combine previous context with the current query
197
- - Example:
198
- Previous: "What is the structure of German banking industry?"
199
- Current: "can you do more latest web search on my previous query"
200
- Should become: "Latest structure and developments in German banking industry after: 2024"
201
 
202
  2. Entity Handling:
203
- - Identify and preserve main entities from context
204
- - Enclose ONLY entity names in double quotes
205
- - Example: "Deutsche Bank" profits, not "Deutsche Bank profits"
 
206
 
207
  3. Date and Time Context:
208
- - For queries about current/latest information:
209
- * Keep time-related words (latest, current, recent, now)
210
- * ALWAYS append "after: YYYY" (current year)
211
- - For specific time periods:
212
- * Preserve the original time reference
213
- * Add appropriate "after: YYYY" based on context
214
- - For queries without time reference:
215
- * Add "after: YYYY" if about current state/status
216
 
217
  4. Query Formatting:
218
- - Capitalize first letter
219
- - No period at end
220
- - Include all relevant context
221
- - Maintain clear and searchable structure
222
-
223
- Remember: Your goal is to create a complete, self-contained query that includes all necessary context from the conversation history."""
224
 
225
  messages = [
226
  {"role": "system", "content": system_prompt},
227
  {"role": "user", "content": f"""Current year: {current_year}
228
-
229
  Recent conversation history:
230
  {chat_context}
231
-
232
- Current query: {query}
233
-
234
  Please rephrase this query into a complete, contextual search query following the rules above. The rephrased query should be clear and complete even without the conversation context."""}
235
  ]
236
 
@@ -243,6 +253,11 @@ Please rephrase this query into a complete, contextual search query following th
243
  )
244
 
245
  rephrased_query = response.choices[0].message.content.strip()
 
 
 
 
 
246
  logger.info(f'Query rephrased to: {rephrased_query}')
247
  return rephrased_query
248
 
 
174
  return f"I apologize, but I encountered an error while processing your query: {str(e)}"
175
 
176
  async def rephrase_query(chat_history, query, temperature=0.2) -> str:
177
+ """Rephrase the query based on chat history and context while preserving URLs."""
178
  logger.info(f'Rephrasing query: {query}')
179
+
180
  try:
181
+ # Extract URLs from the query
182
+ url_pattern = r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+'
183
+ urls = re.findall(url_pattern, query)
184
+
185
+ # If URLs are found, store them and replace with placeholders
186
+ url_placeholders = {}
187
+ modified_query = query
188
+ if urls:
189
+ for idx, url in enumerate(urls):
190
+ placeholder = f"__URL_{idx}__"
191
+ url_placeholders[placeholder] = url
192
+ modified_query = modified_query.replace(url, placeholder)
193
+
194
  # Format recent conversation history (last 3 turns for context)
195
  formatted_history = []
196
  for i, (user_msg, assistant_msg) in enumerate(chat_history[-3:], 1):
 
200
  formatted_history.append(f"Assistant: {assistant_msg}")
201
 
202
  chat_context = "\n".join(formatted_history)
203
+ current_year = datetime.now().year
204
+
205
  system_prompt = """You are a highly intelligent query rephrasing assistant. Your task is to analyze the conversation history and current query to generate a complete, contextual search query.
206
 
207
  Key Rules:
208
  1. For follow-up questions or queries referencing previous conversation:
209
+ - Extract the main topic/subject from previous messages
210
+ - Combine previous context with the current query
211
+ - Example: Previous: "What is the structure of German banking industry?"
212
+ Current: "can you do more latest web search on my previous query"
213
+ Should become: "Latest structure and developments in German banking industry after: 2024"
 
214
 
215
  2. Entity Handling:
216
+ - Identify and preserve main entities from context
217
+ - Enclose ONLY entity names in double quotes
218
+ - Example: "Deutsche Bank" profits, not "Deutsche Bank profits"
219
+ - Preserve URL placeholders exactly as they appear (marked with __URL_N__)
220
 
221
  3. Date and Time Context:
222
+ - For queries about current/latest information:
223
+ * Keep time-related words (latest, current, recent, now)
224
+ * ALWAYS append "after: YYYY" (current year)
225
+ - For specific time periods:
226
+ * Preserve the original time reference
227
+ * Add appropriate "after: YYYY" based on context
228
+ - For queries without time reference:
229
+ * Add "after: YYYY" if about current state/status
230
 
231
  4. Query Formatting:
232
+ - Capitalize first letter
233
+ - No period at end
234
+ - Include all relevant context
235
+ - Maintain clear and searchable structure
236
+ - IMPORTANT: Keep URL placeholders (__URL_N__) exactly as they appear"""
 
237
 
238
  messages = [
239
  {"role": "system", "content": system_prompt},
240
  {"role": "user", "content": f"""Current year: {current_year}
 
241
  Recent conversation history:
242
  {chat_context}
243
+ Current query: {modified_query}
 
 
244
  Please rephrase this query into a complete, contextual search query following the rules above. The rephrased query should be clear and complete even without the conversation context."""}
245
  ]
246
 
 
253
  )
254
 
255
  rephrased_query = response.choices[0].message.content.strip()
256
+
257
+ # Replace placeholders with original URLs
258
+ for placeholder, url in url_placeholders.items():
259
+ rephrased_query = rephrased_query.replace(placeholder, url)
260
+
261
  logger.info(f'Query rephrased to: {rephrased_query}')
262
  return rephrased_query
263