Spaces:
Sleeping
Sleeping
dolphinium
commited on
Commit
Β·
840c57d
1
Parent(s):
c3741ac
add history to chatbot and update solr query generation prompt errors. TODO: fix code generation for visualizations.
Browse files
app.py
CHANGED
@@ -18,6 +18,7 @@ from IPython.display import display, Markdown
|
|
18 |
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
19 |
|
20 |
# --- SSH Tunnel Configuration ---
|
|
|
21 |
SSH_HOST = os.environ.get('SSH_HOST')
|
22 |
SSH_PORT = 5322
|
23 |
SSH_USER = os.environ.get('SSH_USER')
|
@@ -35,7 +36,7 @@ SOLR_PASS = os.environ.get('SOLR_PASS')
|
|
35 |
try:
|
36 |
genai.configure(api_key=os.environ.get('GEMINI_API_KEY'))
|
37 |
except Exception as e:
|
38 |
-
print(f"β Gemini API Key Error: {e}. Please ensure 'GEMINI_API_KEY' is set in
|
39 |
|
40 |
# --- Global Variables ---
|
41 |
ssh_tunnel_server = None
|
@@ -62,7 +63,7 @@ try:
|
|
62 |
print(f"β
Solr connection successful on core '{SOLR_CORE_NAME}'.")
|
63 |
|
64 |
# 3. Initialize the LLM
|
65 |
-
llm_model = genai.GenerativeModel('gemini-
|
66 |
print(f"β
LLM Model '{llm_model.model_name}' initialized.")
|
67 |
|
68 |
print("β
System Initialized Successfully.")
|
@@ -223,49 +224,70 @@ formatted_field_info = format_metadata_for_prompt(field_metadata)
|
|
223 |
|
224 |
def parse_suggestions_from_report(report_text):
|
225 |
"""Extracts numbered suggestions from the report's markdown text."""
|
226 |
-
|
227 |
-
suggestions_match = re.search(r"### Suggestions for Further Exploration\s*\n(.*?)$", report_text, re.DOTALL | re.IGNORECASE)
|
228 |
if not suggestions_match: return []
|
229 |
suggestions_text = suggestions_match.group(1)
|
230 |
suggestions = re.findall(r"^\s*\d+\.\s*(.*)", suggestions_text, re.MULTILINE)
|
231 |
return [s.strip() for s in suggestions]
|
232 |
|
233 |
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
|
|
|
|
|
|
|
|
239 |
|
240 |
prompt = f"""
|
241 |
You are an expert Solr query engineer who converts natural language questions into precise Solr JSON Facet API query objects. Your primary goal is to create a valid JSON object with `query` and `json.facet` keys.
|
242 |
|
243 |
---
|
244 |
-
### CONTEXT & RULES
|
245 |
|
246 |
-
1. **Today's Date for Calculations**:
|
247 |
-
2. **
|
248 |
-
3. **
|
|
|
249 |
* For searching in the main `query` parameter, ALWAYS use the multi-valued search fields (ending in `_s`, like `company_name_s`) to get comprehensive results.
|
250 |
* For grouping in a `terms` facet, ALWAYS use the canonical, single-value field (e.g., `company_name`, `molecule_name`) to ensure unique and accurate grouping.
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
|
256 |
---
|
257 |
### FIELD DEFINITIONS (Your Source of Truth)
|
258 |
|
259 |
-
{formatted_field_info}
|
260 |
---
|
261 |
-
###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
|
263 |
-
**User Query:** "
|
264 |
|
265 |
-
**Correct JSON Output:**
|
266 |
```json
|
267 |
{{
|
268 |
-
"query": "therapeutic_category_s:infections AND date_year:
|
269 |
"json.facet": {{
|
270 |
"injection_deals": {{
|
271 |
"type": "query",
|
@@ -287,44 +309,46 @@ You are an expert Solr query engineer who converts natural language questions in
|
|
287 |
---
|
288 |
### YOUR TASK
|
289 |
|
290 |
-
Now, convert the following user query into a single, raw JSON object with 'query' and 'json.facet' keys, strictly following all rules and field definitions provided above.
|
291 |
|
292 |
-
**User Query:**
|
293 |
"""
|
294 |
try:
|
295 |
-
# Assuming llm_model is your generative model client
|
296 |
response = llm_model.generate_content(prompt)
|
|
|
297 |
cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
|
298 |
return json.loads(cleaned_text)
|
299 |
except Exception as e:
|
300 |
-
|
|
|
301 |
return None
|
302 |
|
|
|
303 |
def llm_generate_visualization_code(query_context, facet_data):
|
304 |
"""Generates Python code for visualization based on query and data."""
|
305 |
prompt = f"""
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
try:
|
329 |
response = llm_model.generate_content(prompt)
|
330 |
code = re.sub(r'^```python\s*|\s*```$', '', response.text, flags=re.MULTILINE)
|
@@ -339,19 +363,20 @@ def execute_viz_code_and_get_path(viz_code, facet_data):
|
|
339 |
try:
|
340 |
if not os.path.exists('/tmp/plots'): os.makedirs('/tmp/plots')
|
341 |
plot_path = f"/tmp/plots/plot_{datetime.datetime.now().timestamp()}.png"
|
342 |
-
|
|
|
343 |
exec(viz_code, exec_globals)
|
344 |
fig = exec_globals.get('fig')
|
345 |
if fig:
|
346 |
fig.savefig(plot_path, bbox_inches='tight')
|
347 |
-
plt.close(fig)
|
348 |
return plot_path
|
349 |
return None
|
350 |
except Exception as e:
|
351 |
print(f"ERROR executing visualization code: {e}\n---Code---\n{viz_code}")
|
352 |
return None
|
353 |
|
354 |
-
|
355 |
def llm_generate_summary_and_suggestions_stream(query_context, facet_data):
|
356 |
"""
|
357 |
Yields a streaming analytical report and strategic, context-aware suggestions for further exploration.
|
@@ -404,7 +429,6 @@ After the report, create a final section titled `### Deeper Dive: Suggested Foll
|
|
404 |
Generate the full report and the strategic suggestions based on the user's question and the data provided.
|
405 |
"""
|
406 |
try:
|
407 |
-
# Assuming llm_model is your generative model client
|
408 |
response_stream = llm_model.generate_content(prompt, stream=True)
|
409 |
for chunk in response_stream:
|
410 |
yield chunk.text
|
@@ -412,17 +436,21 @@ Generate the full report and the strategic suggestions based on the user's quest
|
|
412 |
print(f"Error in llm_generate_summary_and_suggestions_stream: {e}")
|
413 |
yield "Sorry, I was unable to generate a summary for this data."
|
414 |
|
415 |
-
#
|
416 |
def process_analysis_flow(user_input, history, state):
|
417 |
"""
|
418 |
A generator that manages the conversation and yields tuples of UI updates for Gradio.
|
419 |
-
This version treats any user input as a new query.
|
420 |
"""
|
421 |
# Initialize state on the first run
|
422 |
if state is None:
|
423 |
state = {'query_count': 0, 'last_suggestions': []}
|
424 |
|
425 |
-
#
|
|
|
|
|
|
|
|
|
426 |
yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False))
|
427 |
|
428 |
query_context = user_input.strip()
|
@@ -435,8 +463,8 @@ def process_analysis_flow(user_input, history, state):
|
|
435 |
history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating Solr query...*"))
|
436 |
yield (history, state, None, None, None, None)
|
437 |
|
438 |
-
# 2. Generate Solr Query
|
439 |
-
llm_solr_obj =
|
440 |
if not llm_solr_obj or 'query' not in llm_solr_obj or 'json.facet' not in llm_solr_obj:
|
441 |
history.append((None, "I'm sorry, I couldn't generate a valid Solr query for that request. Please try rephrasing your question."))
|
442 |
yield (history, state, None, None, None, None)
|
@@ -482,15 +510,21 @@ def process_analysis_flow(user_input, history, state):
|
|
482 |
yield (history, state, output_plot, output_report, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
|
483 |
|
484 |
report_text = ""
|
|
|
|
|
|
|
485 |
for chunk in llm_generate_summary_and_suggestions_stream(query_context, facet_data):
|
486 |
report_text += chunk
|
487 |
-
yield (
|
|
|
|
|
|
|
488 |
|
489 |
# 6. Finalize and prompt for next action
|
490 |
state['query_count'] += 1
|
491 |
state['last_suggestions'] = parse_suggestions_from_report(report_text)
|
492 |
|
493 |
-
next_prompt = "Analysis complete. What would you like to explore next? You can ask a follow-up question,
|
494 |
history.append((None, next_prompt))
|
495 |
yield (history, state, output_plot, report_text, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
|
496 |
|
@@ -500,21 +534,19 @@ def process_analysis_flow(user_input, history, state):
|
|
500 |
print(f"Error during analysis execution: {e}")
|
501 |
yield (history, state, None, None, gr.update(value=formatted_query, visible=True), None)
|
502 |
|
|
|
|
|
503 |
with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
|
504 |
state = gr.State()
|
505 |
|
506 |
gr.Markdown("# π PharmaCircle AI Data Analyst")
|
507 |
-
# CHANGED: Updated introductory text for the new workflow.
|
508 |
gr.Markdown("Ask a question to begin your analysis. I will generate a Solr query, retrieve the data, create a visualization, and write a report. You can then ask follow-up questions freely.")
|
509 |
|
510 |
with gr.Row():
|
511 |
with gr.Column(scale=1):
|
512 |
-
chatbot = gr.Chatbot(label="Analysis Chat Log", height=700, show_copy_button=True)
|
513 |
-
# CHANGED: Updated placeholder to encourage free-form questions.
|
514 |
msg_textbox = gr.Textbox(placeholder="Ask a question, e.g., 'Show me the top 5 companies by total deal value in 2023'", label="Your Question", interactive=True)
|
515 |
with gr.Row():
|
516 |
-
# REMOVED: The "Start Initial Analysis" button.
|
517 |
-
# CHANGED: The "Clear" button is now the primary action button besides submitting text.
|
518 |
clear_button = gr.Button("π Start New Analysis", variant="primary")
|
519 |
|
520 |
with gr.Column(scale=2):
|
@@ -526,28 +558,36 @@ with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}")
|
|
526 |
report_display = gr.Markdown("Report will be streamed here...", visible=False)
|
527 |
|
528 |
# --- Event Wiring ---
|
529 |
-
# REMOVED: The click handler for the old start button.
|
530 |
-
|
531 |
-
# This is now the main event handler for all user queries.
|
532 |
-
msg_textbox.submit(
|
533 |
-
fn=process_analysis_flow,
|
534 |
-
inputs=[msg_textbox, chatbot, state],
|
535 |
-
outputs=[chatbot, state, plot_display, report_display, solr_query_display, solr_data_display]
|
536 |
-
)
|
537 |
-
|
538 |
def reset_all():
|
539 |
-
|
540 |
return (
|
541 |
-
|
542 |
-
None, # state
|
543 |
-
"", # msg_textbox
|
544 |
gr.update(value=None, visible=False), # plot_display
|
545 |
gr.update(value=None, visible=False), # report_display
|
546 |
-
gr.update(value=None, visible=False),
|
547 |
-
gr.update(value=None, visible=False)
|
548 |
)
|
549 |
|
550 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
551 |
|
552 |
if is_initialized:
|
553 |
demo.queue().launch(debug=True, share=True)
|
|
|
18 |
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
19 |
|
20 |
# --- SSH Tunnel Configuration ---
|
21 |
+
# It's recommended to load secrets securely, e.g., from environment variables
|
22 |
SSH_HOST = os.environ.get('SSH_HOST')
|
23 |
SSH_PORT = 5322
|
24 |
SSH_USER = os.environ.get('SSH_USER')
|
|
|
36 |
try:
|
37 |
genai.configure(api_key=os.environ.get('GEMINI_API_KEY'))
|
38 |
except Exception as e:
|
39 |
+
print(f"β Gemini API Key Error: {e}. Please ensure 'GEMINI_API_KEY' is set in your environment.")
|
40 |
|
41 |
# --- Global Variables ---
|
42 |
ssh_tunnel_server = None
|
|
|
63 |
print(f"β
Solr connection successful on core '{SOLR_CORE_NAME}'.")
|
64 |
|
65 |
# 3. Initialize the LLM
|
66 |
+
llm_model = genai.GenerativeModel('gemini-1.5-flash', generation_config=genai.types.GenerationConfig(temperature=0))
|
67 |
print(f"β
LLM Model '{llm_model.model_name}' initialized.")
|
68 |
|
69 |
print("β
System Initialized Successfully.")
|
|
|
224 |
|
225 |
def parse_suggestions_from_report(report_text):
|
226 |
"""Extracts numbered suggestions from the report's markdown text."""
|
227 |
+
suggestions_match = re.search(r"### (?:Deeper Dive: Suggested Follow-up Analyses|Suggestions for Further Exploration)\s*\n(.*?)$", report_text, re.DOTALL | re.IGNORECASE)
|
|
|
228 |
if not suggestions_match: return []
|
229 |
suggestions_text = suggestions_match.group(1)
|
230 |
suggestions = re.findall(r"^\s*\d+\.\s*(.*)", suggestions_text, re.MULTILINE)
|
231 |
return [s.strip() for s in suggestions]
|
232 |
|
233 |
|
234 |
+
def llm_generate_solr_query_with_history(natural_language_query, field_metadata, chat_history):
|
235 |
+
"""Generates a Solr query and facet JSON from a natural language query, considering the conversation history."""
|
236 |
+
# Format the chat history for the prompt
|
237 |
+
formatted_history = ""
|
238 |
+
for user_msg, bot_msg in chat_history:
|
239 |
+
# We only need the user's queries for context, not the bot's detailed responses.
|
240 |
+
if user_msg:
|
241 |
+
# CORRECTED: Properly formatted f-string with a newline character
|
242 |
+
formatted_history += f"- User: \"{user_msg}\"\n"
|
243 |
|
244 |
prompt = f"""
|
245 |
You are an expert Solr query engineer who converts natural language questions into precise Solr JSON Facet API query objects. Your primary goal is to create a valid JSON object with `query` and `json.facet` keys.
|
246 |
|
247 |
---
|
248 |
+
### CONVERSATIONAL CONTEXT & RULES
|
249 |
|
250 |
+
1. **Today's Date for Calculations**: 2025-07-16
|
251 |
+
2. **Allowed Facet Types**: The `type` key for any facet MUST be one of the following: `terms`, `query`, or `range`. **Do not use `date_histogram`**. For time-series analysis, use a `range` facet on a date field.
|
252 |
+
3. **Field Usage**: You MUST use the fields described in the 'Field Definitions' section. Pay close attention to the definitions to select the correct field.
|
253 |
+
4. **Facet vs. Query Field Distinction**: This is critical.
|
254 |
* For searching in the main `query` parameter, ALWAYS use the multi-valued search fields (ending in `_s`, like `company_name_s`) to get comprehensive results.
|
255 |
* For grouping in a `terms` facet, ALWAYS use the canonical, single-value field (e.g., `company_name`, `molecule_name`) to ensure unique and accurate grouping.
|
256 |
+
5. **No `count(*)`**: Do NOT use functions like `count(*)`. The default facet bucket count is sufficient for counting documents.
|
257 |
+
6. **Allowed Aggregations**: For statistical facets, only use these functions: `sum`, `avg`, `min`, `max`, `unique`. The primary metric field is `total_deal_value_in_million`. The aggregation MUST be a simple string like `"sum(total_deal_value_in_million)"` and not a nested JSON object.
|
258 |
+
7. **Term Facet Limits**: Every `terms` facet MUST include a `limit` key. Default to `limit: 10` unless the user specifies a different number of top results.
|
259 |
+
8. **Output Format**: Your final output must be a single, raw JSON object and nothing else. Do not add comments, explanations, or markdown formatting like ```json.
|
260 |
|
261 |
---
|
262 |
### FIELD DEFINITIONS (Your Source of Truth)
|
263 |
|
264 |
+
`{formatted_field_info}`
|
265 |
---
|
266 |
+
### CHAT HISTORY
|
267 |
+
`{formatted_history}`
|
268 |
+
---
|
269 |
+
### EXAMPLE OF A FOLLOW-UP QUERY
|
270 |
+
|
271 |
+
**Initial User Query:** "What are the infections news in this year?"
|
272 |
+
```json
|
273 |
+
{{
|
274 |
+
"query": "date_year:2025 AND therapeutic_category_s:infections",
|
275 |
+
"json.facet": {{
|
276 |
+
"infections_news_by_type": {{
|
277 |
+
"type": "terms",
|
278 |
+
"field": "news_type",
|
279 |
+
"limit": 10
|
280 |
+
}}
|
281 |
+
}}
|
282 |
+
}}
|
283 |
+
```
|
284 |
|
285 |
+
**Follow-up User Query:** "Compare deal values for injection vs oral."
|
286 |
|
287 |
+
**Correct JSON Output for the Follow-up:**
|
288 |
```json
|
289 |
{{
|
290 |
+
"query": "therapeutic_category_s:infections AND date_year:2025 AND total_deal_value_in_million:[0 TO *]",
|
291 |
"json.facet": {{
|
292 |
"injection_deals": {{
|
293 |
"type": "query",
|
|
|
309 |
---
|
310 |
### YOUR TASK
|
311 |
|
312 |
+
Now, convert the following user query into a single, raw JSON object with 'query' and 'json.facet' keys, strictly following all rules and field definitions provided above and considering the chat history.
|
313 |
|
314 |
+
**Current User Query:** `{natural_language_query}`
|
315 |
"""
|
316 |
try:
|
|
|
317 |
response = llm_model.generate_content(prompt)
|
318 |
+
# Using a more robust regex to clean the response
|
319 |
cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
|
320 |
return json.loads(cleaned_text)
|
321 |
except Exception as e:
|
322 |
+
raw_response_text = response.text if 'response' in locals() else 'N/A'
|
323 |
+
print(f"Error in llm_generate_solr_query_with_history: {e}\nRaw Response:\n{raw_response_text}")
|
324 |
return None
|
325 |
|
326 |
+
|
327 |
def llm_generate_visualization_code(query_context, facet_data):
|
328 |
"""Generates Python code for visualization based on query and data."""
|
329 |
prompt = f"""
|
330 |
+
You are a Python Data Visualization expert specializing in Matplotlib and Seaborn.
|
331 |
+
Your task is to generate Python code to create a single, insightful visualization.
|
332 |
+
|
333 |
+
**Context:**
|
334 |
+
1. **User's Analytical Goal:** "{query_context}"
|
335 |
+
2. **Aggregated Data (from Solr Facets):**
|
336 |
+
```json
|
337 |
+
{json.dumps(facet_data, indent=2)}
|
338 |
+
```
|
339 |
+
|
340 |
+
**Instructions:**
|
341 |
+
1. **Goal:** Write Python code to generate a chart that best visualizes the answer to the user's goal using the provided data.
|
342 |
+
2. **Data Access:** The data is available in a Python dictionary named `facet_data`. Your code must parse this dictionary.
|
343 |
+
3. **Code Requirements:**
|
344 |
+
* Start with `import matplotlib.pyplot as plt` and `import seaborn as sns`.
|
345 |
+
* Use `plt.style.use('seaborn-v0_8-whitegrid')` and `fig, ax = plt.subplots(figsize=(12, 7))`. Plot using the `ax` object.
|
346 |
+
* Always include a clear `ax.set_title(...)`, `ax.set_xlabel(...)`, and `ax.set_ylabel(...)`.
|
347 |
+
* Dynamically find the primary facet key and extract the 'buckets'.
|
348 |
+
* For each bucket, extract the 'val' (label) and the relevant metric ('count' or a nested metric).
|
349 |
+
* Use `plt.tight_layout()` and rotate x-axis labels if needed.
|
350 |
+
4. **Output Format:** ONLY output raw Python code. Do not wrap it in ```python ... ```. Do not include `plt.show()` or any explanation.
|
351 |
+
"""
|
352 |
try:
|
353 |
response = llm_model.generate_content(prompt)
|
354 |
code = re.sub(r'^```python\s*|\s*```$', '', response.text, flags=re.MULTILINE)
|
|
|
363 |
try:
|
364 |
if not os.path.exists('/tmp/plots'): os.makedirs('/tmp/plots')
|
365 |
plot_path = f"/tmp/plots/plot_{datetime.datetime.now().timestamp()}.png"
|
366 |
+
# The exec environment needs access to the required libraries and the data
|
367 |
+
exec_globals = {'facet_data': facet_data, 'plt': plt, 'sns': sns, 'pd': pd}
|
368 |
exec(viz_code, exec_globals)
|
369 |
fig = exec_globals.get('fig')
|
370 |
if fig:
|
371 |
fig.savefig(plot_path, bbox_inches='tight')
|
372 |
+
plt.close(fig) # Important to free up memory
|
373 |
return plot_path
|
374 |
return None
|
375 |
except Exception as e:
|
376 |
print(f"ERROR executing visualization code: {e}\n---Code---\n{viz_code}")
|
377 |
return None
|
378 |
|
379 |
+
|
380 |
def llm_generate_summary_and_suggestions_stream(query_context, facet_data):
|
381 |
"""
|
382 |
Yields a streaming analytical report and strategic, context-aware suggestions for further exploration.
|
|
|
429 |
Generate the full report and the strategic suggestions based on the user's question and the data provided.
|
430 |
"""
|
431 |
try:
|
|
|
432 |
response_stream = llm_model.generate_content(prompt, stream=True)
|
433 |
for chunk in response_stream:
|
434 |
yield chunk.text
|
|
|
436 |
print(f"Error in llm_generate_summary_and_suggestions_stream: {e}")
|
437 |
yield "Sorry, I was unable to generate a summary for this data."
|
438 |
|
439 |
+
# CORRECTED: Only one, correctly implemented version of this function remains.
|
440 |
def process_analysis_flow(user_input, history, state):
|
441 |
"""
|
442 |
A generator that manages the conversation and yields tuples of UI updates for Gradio.
|
443 |
+
This version treats any user input as a new query and considers conversation history.
|
444 |
"""
|
445 |
# Initialize state on the first run
|
446 |
if state is None:
|
447 |
state = {'query_count': 0, 'last_suggestions': []}
|
448 |
|
449 |
+
# If history is None (from a reset), initialize it as an empty list
|
450 |
+
if history is None:
|
451 |
+
history = []
|
452 |
+
|
453 |
+
# Reset UI components for the new analysis, but keep chat history
|
454 |
yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False))
|
455 |
|
456 |
query_context = user_input.strip()
|
|
|
463 |
history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating Solr query...*"))
|
464 |
yield (history, state, None, None, None, None)
|
465 |
|
466 |
+
# 2. Generate Solr Query with history
|
467 |
+
llm_solr_obj = llm_generate_solr_query_with_history(query_context, field_metadata, history)
|
468 |
if not llm_solr_obj or 'query' not in llm_solr_obj or 'json.facet' not in llm_solr_obj:
|
469 |
history.append((None, "I'm sorry, I couldn't generate a valid Solr query for that request. Please try rephrasing your question."))
|
470 |
yield (history, state, None, None, None, None)
|
|
|
510 |
yield (history, state, output_plot, output_report, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
|
511 |
|
512 |
report_text = ""
|
513 |
+
# The history object is not modified during streaming, so we pass it once
|
514 |
+
# The yield statement for streaming only updates the report text
|
515 |
+
stream_history = history[:] # Make a copy
|
516 |
for chunk in llm_generate_summary_and_suggestions_stream(query_context, facet_data):
|
517 |
report_text += chunk
|
518 |
+
yield (stream_history, state, output_plot, report_text, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
|
519 |
+
|
520 |
+
# Update the main history with the final report text
|
521 |
+
history.append((None, report_text))
|
522 |
|
523 |
# 6. Finalize and prompt for next action
|
524 |
state['query_count'] += 1
|
525 |
state['last_suggestions'] = parse_suggestions_from_report(report_text)
|
526 |
|
527 |
+
next_prompt = "Analysis complete. What would you like to explore next? You can ask a follow-up question, or ask something new."
|
528 |
history.append((None, next_prompt))
|
529 |
yield (history, state, output_plot, report_text, gr.update(value=formatted_query, visible=True), gr.update(value=formatted_data, visible=True))
|
530 |
|
|
|
534 |
print(f"Error during analysis execution: {e}")
|
535 |
yield (history, state, None, None, gr.update(value=formatted_query, visible=True), None)
|
536 |
|
537 |
+
|
538 |
+
# --- Gradio UI ---
|
539 |
with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
|
540 |
state = gr.State()
|
541 |
|
542 |
gr.Markdown("# π PharmaCircle AI Data Analyst")
|
|
|
543 |
gr.Markdown("Ask a question to begin your analysis. I will generate a Solr query, retrieve the data, create a visualization, and write a report. You can then ask follow-up questions freely.")
|
544 |
|
545 |
with gr.Row():
|
546 |
with gr.Column(scale=1):
|
547 |
+
chatbot = gr.Chatbot(label="Analysis Chat Log", height=700, show_copy_button=True, avatar_images=(None, "https://pharma-circle.com/images/favicon.png"))
|
|
|
548 |
msg_textbox = gr.Textbox(placeholder="Ask a question, e.g., 'Show me the top 5 companies by total deal value in 2023'", label="Your Question", interactive=True)
|
549 |
with gr.Row():
|
|
|
|
|
550 |
clear_button = gr.Button("π Start New Analysis", variant="primary")
|
551 |
|
552 |
with gr.Column(scale=2):
|
|
|
558 |
report_display = gr.Markdown("Report will be streamed here...", visible=False)
|
559 |
|
560 |
# --- Event Wiring ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
561 |
def reset_all():
|
562 |
+
"""Resets the entire UI for a new analysis session."""
|
563 |
return (
|
564 |
+
[], # chatbot (cleared)
|
565 |
+
None, # state (reset)
|
566 |
+
"", # msg_textbox (cleared)
|
567 |
gr.update(value=None, visible=False), # plot_display
|
568 |
gr.update(value=None, visible=False), # report_display
|
569 |
+
gr.update(value=None, visible=False), # solr_query_display
|
570 |
+
gr.update(value=None, visible=False) # solr_data_display
|
571 |
)
|
572 |
|
573 |
+
# Main event handler for all user queries
|
574 |
+
msg_textbox.submit(
|
575 |
+
fn=process_analysis_flow,
|
576 |
+
inputs=[msg_textbox, chatbot, state],
|
577 |
+
outputs=[chatbot, state, plot_display, report_display, solr_query_display, solr_data_display],
|
578 |
+
).then(
|
579 |
+
lambda: gr.update(value=""),
|
580 |
+
None,
|
581 |
+
[msg_textbox],
|
582 |
+
queue=False,
|
583 |
+
)
|
584 |
+
|
585 |
+
clear_button.click(
|
586 |
+
fn=reset_all,
|
587 |
+
inputs=None,
|
588 |
+
outputs=[chatbot, state, msg_textbox, plot_display, report_display, solr_query_display, solr_data_display],
|
589 |
+
queue=False
|
590 |
+
)
|
591 |
|
592 |
if is_initialized:
|
593 |
demo.queue().launch(debug=True, share=True)
|