Shreyas094 commited on
Commit
4040242
·
verified ·
1 Parent(s): d5ce886

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -41
app.py CHANGED
@@ -177,53 +177,63 @@ async def rephrase_query(chat_history, query, temperature=0.2) -> str:
177
  """Rephrase the query based on chat history and context."""
178
  logger.info(f'Rephrasing query: {query}')
179
  try:
180
- # Format chat history for context
181
  formatted_history = []
182
- for user_msg, assistant_msg in chat_history:
183
- formatted_history.append({"role": "user", "content": user_msg})
184
- if assistant_msg: # Only add if there's an assistant message
185
- formatted_history.append({"role": "assistant", "content": assistant_msg})
 
186
 
 
187
  current_year = datetime.now().year
188
- system_prompt = """You are a highly intelligent and context-aware query rephrasing assistant. Your task is to rephrase search queries while following these strict rules:
189
-
190
- 1. Entity Handling:
191
- - Identify main entities (organizations, brands, products, locations)
192
- - Enclose ONLY the entity names in double quotes
193
- - Example: "Apple" stock price, not "Apple stock price"
194
-
195
- 2. Date Handling Rules (VERY IMPORTANT):
196
- - For queries about current/latest/recent information:
197
- * If query contains words like "latest", "current", "recent", "now", "today":
198
- - Keep these words in the query
199
- - ALWAYS append "after: YYYY" (current year) at the end
200
- * Example: "latest news on "Apple"" becomes "latest news on "Apple" after: 2024"
201
-
202
- - For queries with specific time periods:
203
- * Keep the original time reference
204
- * Add appropriate "after: YYYY" based on the mentioned year
205
- * Example: "How did "Bank of America" perform in Q2 2023" becomes
206
- "How did "Bank of America" perform in Q2 2023 after: 2023"
207
-
208
- - For queries without any time reference:
209
- * ALWAYS append "after: YYYY" (current year) at the end
210
- * Example: ""Toyota" market share" becomes ""Toyota" market share after: 2024"
211
-
212
- 3. Output Format:
213
- - First letter should be capitalized
214
- - No period at the end
215
- - Include all specified date operators
216
- - Maintain the entire original query's meaning and context
217
-
218
- Remember: EVERY query must end with a date operator unless it explicitly references a past date/year."""
219
-
220
- # Prepare messages for the API call
 
 
221
  messages = [
222
  {"role": "system", "content": system_prompt},
223
- {"role": "user", "content": f"Current year is {current_year}. Rephrase this query: {query}"}
 
 
 
 
 
 
 
224
  ]
225
 
226
- # Call Groq API
227
  response = groq_client.chat.completions.create(
228
  messages=messages,
229
  model="llama-3.1-70b-versatile",
@@ -238,7 +248,14 @@ Remember: EVERY query must end with a date operator unless it explicitly referen
238
 
239
  except Exception as e:
240
  logger.error(f'Error rephrasing query: {e}')
241
- return query # Return original query if rephrasing fails
 
 
 
 
 
 
 
242
 
243
  class ParallelScraper:
244
  def __init__(self, max_workers: int = 5):
 
177
  """Rephrase the query based on chat history and context."""
178
  logger.info(f'Rephrasing query: {query}')
179
  try:
180
+ # Format recent conversation history (last 3 turns for context)
181
  formatted_history = []
182
+ for i, (user_msg, assistant_msg) in enumerate(chat_history[-3:], 1):
183
+ formatted_history.append(f"Turn {i}:")
184
+ formatted_history.append(f"User: {user_msg}")
185
+ if assistant_msg:
186
+ formatted_history.append(f"Assistant: {assistant_msg}")
187
 
188
+ chat_context = "\n".join(formatted_history)
189
  current_year = datetime.now().year
190
+
191
+ system_prompt = """You are a highly intelligent query rephrasing assistant. Your task is to analyze the conversation history and current query to generate a complete, contextual search query.
192
+
193
+ Key Rules:
194
+ 1. For follow-up questions or queries referencing previous conversation:
195
+ - Extract the main topic/subject from previous messages
196
+ - Combine previous context with the current query
197
+ - Example:
198
+ Previous: "What is the structure of German banking industry?"
199
+ Current: "can you do more latest web search on my previous query"
200
+ Should become: "Latest structure and developments in German banking industry after: 2024"
201
+
202
+ 2. Entity Handling:
203
+ - Identify and preserve main entities from context
204
+ - Enclose ONLY entity names in double quotes
205
+ - Example: "Deutsche Bank" profits, not "Deutsche Bank profits"
206
+
207
+ 3. Date and Time Context:
208
+ - For queries about current/latest information:
209
+ * Keep time-related words (latest, current, recent, now)
210
+ * ALWAYS append "after: YYYY" (current year)
211
+ - For specific time periods:
212
+ * Preserve the original time reference
213
+ * Add appropriate "after: YYYY" based on context
214
+ - For queries without time reference:
215
+ * Add "after: YYYY" if about current state/status
216
+
217
+ 4. Query Formatting:
218
+ - Capitalize first letter
219
+ - No period at end
220
+ - Include all relevant context
221
+ - Maintain clear and searchable structure
222
+
223
+ Remember: Your goal is to create a complete, self-contained query that includes all necessary context from the conversation history."""
224
+
225
  messages = [
226
  {"role": "system", "content": system_prompt},
227
+ {"role": "user", "content": f"""Current year: {current_year}
228
+
229
+ Recent conversation history:
230
+ {chat_context}
231
+
232
+ Current query: {query}
233
+
234
+ Please rephrase this query into a complete, contextual search query following the rules above. The rephrased query should be clear and complete even without the conversation context."""}
235
  ]
236
 
 
237
  response = groq_client.chat.completions.create(
238
  messages=messages,
239
  model="llama-3.1-70b-versatile",
 
248
 
249
  except Exception as e:
250
  logger.error(f'Error rephrasing query: {e}')
251
+ # If rephrasing fails, construct a basic contextual query
252
+ try:
253
+ last_query = chat_history[-1][0] if chat_history else ""
254
+ if any(word in query.lower() for word in ['latest', 'recent', 'current', 'now', 'update']):
255
+ return f"{last_query} latest updates after: {datetime.now().year}"
256
+ return query
257
+ except:
258
+ return query # Return original query as last resort
259
 
260
  class ParallelScraper:
261
  def __init__(self, max_workers: int = 5):