dolphinium
commited on
Commit
Β·
a811652
1
Parent(s):
59c2657
Enhance error handling and logging in various modules
Browse files- Improved exception handling in app.py to provide more specific error messages.
- Added timeout and error handling for API requests in extract_results.py.
- Implemented thread safety for Solr client access in ui.py.
- Enhanced error messages in connections.py and data_processing.py for better debugging.
- app.py +4 -1
- connections.py +21 -3
- data_processing.py +78 -57
- extract_results.py +43 -35
- ui.py +25 -22
app.py
CHANGED
@@ -29,8 +29,11 @@ def main():
|
|
29 |
demo = create_ui(llm_model, solr_client)
|
30 |
try:
|
31 |
demo.queue().launch(debug=True, share=True, allowed_paths=['/tmp/plots'])
|
32 |
-
except
|
33 |
print(f"An error occurred while launching the Gradio app: {e}")
|
|
|
|
|
|
|
34 |
finally:
|
35 |
# Ensure the SSH tunnel is closed when the app is shut down
|
36 |
print("\nClosing SSH tunnel...")
|
|
|
29 |
demo = create_ui(llm_model, solr_client)
|
30 |
try:
|
31 |
demo.queue().launch(debug=True, share=True, allowed_paths=['/tmp/plots'])
|
32 |
+
except (IOError, OSError) as e:
|
33 |
print(f"An error occurred while launching the Gradio app: {e}")
|
34 |
+
print("Please check if the port is already in use or if you have the necessary permissions.")
|
35 |
+
except Exception as e:
|
36 |
+
print(f"An unexpected error occurred: {e}")
|
37 |
finally:
|
38 |
# Ensure the SSH tunnel is closed when the app is shut down
|
39 |
print("\nClosing SSH tunnel...")
|
connections.py
CHANGED
@@ -23,6 +23,7 @@ def initialize_connections():
|
|
23 |
ssh_tunnel_server = None
|
24 |
try:
|
25 |
# 1. Configure and start the SSH Tunnel
|
|
|
26 |
ssh_tunnel_server = SSHTunnelForwarder(
|
27 |
(config.SSH_HOST, config.SSH_PORT),
|
28 |
ssh_username=config.SSH_USER,
|
@@ -36,19 +37,36 @@ def initialize_connections():
|
|
36 |
# 2. Initialize the pysolr client
|
37 |
solr_url = f'http://127.0.0.1:{ssh_tunnel_server.local_bind_port}/solr/{config.SOLR_CORE_NAME}'
|
38 |
solr_client = pysolr.Solr(solr_url, auth=(config.SOLR_USER, config.SOLR_PASS), always_commit=True)
|
39 |
-
solr_client.ping()
|
40 |
print(f"β
Solr connection successful on core '{config.SOLR_CORE_NAME}'.")
|
41 |
|
42 |
# 3. Initialize the LLM
|
|
|
|
|
|
|
|
|
43 |
genai.configure(api_key=config.GEMINI_API_KEY)
|
44 |
-
llm_model = genai.GenerativeModel('gemini-
|
45 |
print(f"β
LLM Model '{llm_model.model_name}' initialized.")
|
46 |
|
47 |
print("β
System Initialized Successfully.")
|
48 |
return ssh_tunnel_server, solr_client, llm_model
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
except Exception as e:
|
51 |
-
print(f"\nβ An error occurred during setup: {e}")
|
52 |
if ssh_tunnel_server and ssh_tunnel_server.is_active:
|
53 |
ssh_tunnel_server.stop()
|
54 |
return None, None, None
|
|
|
23 |
ssh_tunnel_server = None
|
24 |
try:
|
25 |
# 1. Configure and start the SSH Tunnel
|
26 |
+
print("Attempting to start SSH tunnel...")
|
27 |
ssh_tunnel_server = SSHTunnelForwarder(
|
28 |
(config.SSH_HOST, config.SSH_PORT),
|
29 |
ssh_username=config.SSH_USER,
|
|
|
37 |
# 2. Initialize the pysolr client
|
38 |
solr_url = f'http://127.0.0.1:{ssh_tunnel_server.local_bind_port}/solr/{config.SOLR_CORE_NAME}'
|
39 |
solr_client = pysolr.Solr(solr_url, auth=(config.SOLR_USER, config.SOLR_PASS), always_commit=True)
|
40 |
+
solr_client.ping() # Verify connection
|
41 |
print(f"β
Solr connection successful on core '{config.SOLR_CORE_NAME}'.")
|
42 |
|
43 |
# 3. Initialize the LLM
|
44 |
+
if not config.GEMINI_API_KEY:
|
45 |
+
print("β CRITICAL: GEMINI_API_KEY is not set. LLM will not be available.")
|
46 |
+
raise ValueError("GEMINI_API_KEY is missing.")
|
47 |
+
|
48 |
genai.configure(api_key=config.GEMINI_API_KEY)
|
49 |
+
llm_model = genai.GenerativeModel('gemini-1.5-flash', generation_config=genai.types.GenerationConfig(temperature=0))
|
50 |
print(f"β
LLM Model '{llm_model.model_name}' initialized.")
|
51 |
|
52 |
print("β
System Initialized Successfully.")
|
53 |
return ssh_tunnel_server, solr_client, llm_model
|
54 |
|
55 |
+
except pysolr.SolrError as e:
|
56 |
+
print(f"\nβ Solr Error: {e}")
|
57 |
+
print("Please check if the Solr core '{config.SOLR_CORE_NAME}' exists and is running.")
|
58 |
+
if ssh_tunnel_server and ssh_tunnel_server.is_active:
|
59 |
+
ssh_tunnel_server.stop()
|
60 |
+
return None, None, None
|
61 |
+
|
62 |
+
except ValueError as e:
|
63 |
+
print(f"\nβ Configuration Error: {e}")
|
64 |
+
if ssh_tunnel_server and ssh_tunnel_server.is_active:
|
65 |
+
ssh_tunnel_server.stop()
|
66 |
+
return None, None, None
|
67 |
+
|
68 |
except Exception as e:
|
69 |
+
print(f"\nβ An unexpected error occurred during setup: {e}")
|
70 |
if ssh_tunnel_server and ssh_tunnel_server.is_active:
|
71 |
ssh_tunnel_server.stop()
|
72 |
return None, None, None
|
data_processing.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
"""
|
2 |
Core data processing and analysis logic for the PharmaCircle AI Data Analyst.
|
3 |
|
@@ -9,7 +10,6 @@ This module orchestrates the main analysis workflow:
|
|
9 |
5. Synthesizes the findings into a comprehensive, user-facing report.
|
10 |
"""
|
11 |
|
12 |
-
|
13 |
import json
|
14 |
import re
|
15 |
import datetime
|
@@ -21,6 +21,7 @@ import concurrent.futures
|
|
21 |
import copy
|
22 |
import google.generativeai as genai
|
23 |
import urllib
|
|
|
24 |
|
25 |
from llm_prompts import (
|
26 |
get_analysis_plan_prompt,
|
@@ -29,7 +30,6 @@ from llm_prompts import (
|
|
29 |
)
|
30 |
from extract_results import get_search_list_params
|
31 |
|
32 |
-
|
33 |
def parse_suggestions_from_report(report_text):
|
34 |
"""Extracts numbered suggestions from the report's markdown text."""
|
35 |
suggestions_match = re.search(r"### (?:Deeper Dive: Suggested Follow-up Analyses|Suggestions for Further Exploration)\s*\n(.*?)$", report_text, re.DOTALL | re.IGNORECASE)
|
@@ -38,58 +38,52 @@ def parse_suggestions_from_report(report_text):
|
|
38 |
suggestions = re.findall(r"^\s*\d+\.\s*(.*)", suggestions_text, re.MULTILINE)
|
39 |
return [s.strip() for s in suggestions]
|
40 |
|
41 |
-
|
42 |
def llm_generate_analysis_plan_with_history(llm_model, natural_language_query, chat_history):
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
print(f"Warning: Could not retrieve dynamic search fields. Proceeding without them. Error: {e}")
|
54 |
-
|
55 |
-
# Determine the core name, default to 'news' if not provided by the API
|
56 |
-
core_name = search_name if search_name else 'news'
|
57 |
-
|
58 |
-
# Apply the field mappings to the suggestions before sending them to the LLM
|
59 |
-
mapped_search_fields = []
|
60 |
-
if search_fields and field_mappings:
|
61 |
-
for field in search_fields:
|
62 |
-
original_name = field.get('field_name')
|
63 |
-
# Create a new dict to avoid modifying the original
|
64 |
-
mapped_field = field.copy()
|
65 |
-
if original_name in field_mappings:
|
66 |
-
mapped_field['field_name'] = field_mappings[original_name]
|
67 |
-
print(f"Mapped field '{original_name}' to '{mapped_field['field_name']}'")
|
68 |
-
mapped_search_fields.append(mapped_field)
|
69 |
-
else:
|
70 |
-
mapped_search_fields = search_fields
|
71 |
-
|
72 |
|
73 |
-
|
74 |
-
prompt = get_analysis_plan_prompt(natural_language_query, chat_history, mapped_search_fields, core_name)
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
|
|
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
def execute_quantitative_query(solr_client, plan):
|
91 |
"""Executes the facet query to get aggregate data."""
|
92 |
if not plan or 'quantitative_request' not in plan or 'json.facet' not in plan.get('quantitative_request', {}):
|
|
|
93 |
return None, None
|
94 |
try:
|
95 |
params = {
|
@@ -97,38 +91,43 @@ def execute_quantitative_query(solr_client, plan):
|
|
97 |
"rows": 0,
|
98 |
"json.facet": json.dumps(plan['quantitative_request']['json.facet'])
|
99 |
}
|
100 |
-
# Build the full Solr URL manually (for logging) from the client's current URL
|
101 |
base_url = f"{solr_client.url}/select"
|
102 |
query_string = urllib.parse.urlencode(params)
|
103 |
full_url = f"{base_url}?{query_string}"
|
104 |
print(f"[DEBUG] Solr QUANTITATIVE query URL: {full_url}")
|
105 |
results = solr_client.search(**params)
|
106 |
return results.raw_response.get("facets", {}), full_url
|
|
|
|
|
|
|
107 |
except Exception as e:
|
108 |
-
print(f"
|
109 |
return None, None
|
110 |
|
111 |
def execute_qualitative_query(solr_client, plan):
|
112 |
"""Executes the grouping query to get the best example docs."""
|
113 |
if not plan or 'qualitative_request' not in plan:
|
|
|
114 |
return None, None
|
115 |
try:
|
116 |
qual_request = copy.deepcopy(plan['qualitative_request'])
|
117 |
params = {
|
118 |
"q": plan.get('query_filter', '*_*'),
|
119 |
-
"rows": 5,
|
120 |
"fl": "*,score",
|
121 |
**qual_request
|
122 |
}
|
123 |
-
# Build the full Solr URL manually (for logging) from the client's current URL
|
124 |
base_url = f"{solr_client.url}/select"
|
125 |
query_string = urllib.parse.urlencode(params)
|
126 |
full_url = f"{base_url}?{query_string}"
|
127 |
print(f"[DEBUG] Solr QUALITATIVE query URL: {full_url}")
|
128 |
results = solr_client.search(**params)
|
129 |
return results.grouped, full_url
|
|
|
|
|
|
|
130 |
except Exception as e:
|
131 |
-
print(f"
|
132 |
return None, None
|
133 |
|
134 |
def llm_synthesize_enriched_report_stream(llm_model, query, quantitative_data, qualitative_data, plan):
|
@@ -143,7 +142,7 @@ def llm_synthesize_enriched_report_stream(llm_model, query, quantitative_data, q
|
|
143 |
yield chunk.text
|
144 |
except Exception as e:
|
145 |
print(f"Error in llm_synthesize_enriched_report_stream: {e}")
|
146 |
-
yield "Sorry,
|
147 |
|
148 |
def llm_generate_visualization_code(llm_model, query_context, facet_data):
|
149 |
"""Generates Python code for visualization based on query and data."""
|
@@ -154,23 +153,45 @@ def llm_generate_visualization_code(llm_model, query_context, facet_data):
|
|
154 |
code = re.sub(r'^```python\s*|```$', '', response.text, flags=re.MULTILINE)
|
155 |
return code
|
156 |
except Exception as e:
|
157 |
-
|
|
|
158 |
return None
|
159 |
|
160 |
def execute_viz_code_and_get_path(viz_code, facet_data):
|
161 |
"""Executes visualization code and returns the path to the saved plot image."""
|
162 |
-
if not viz_code:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
try:
|
164 |
-
if not os.path.exists('/tmp/plots'):
|
|
|
165 |
plot_path = f"/tmp/plots/plot_{datetime.datetime.now().timestamp()}.png"
|
|
|
|
|
166 |
exec_globals = {'facet_data': facet_data, 'plt': plt, 'sns': sns, 'pd': pd}
|
|
|
|
|
167 |
exec(viz_code, exec_globals)
|
|
|
168 |
fig = exec_globals.get('fig')
|
169 |
if fig:
|
170 |
fig.savefig(plot_path, bbox_inches='tight')
|
171 |
plt.close(fig)
|
|
|
172 |
return plot_path
|
173 |
-
|
|
|
|
|
|
|
174 |
except Exception as e:
|
175 |
-
print(f"ERROR executing visualization code:
|
176 |
-
|
|
|
|
|
|
|
|
1 |
+
|
2 |
"""
|
3 |
Core data processing and analysis logic for the PharmaCircle AI Data Analyst.
|
4 |
|
|
|
10 |
5. Synthesizes the findings into a comprehensive, user-facing report.
|
11 |
"""
|
12 |
|
|
|
13 |
import json
|
14 |
import re
|
15 |
import datetime
|
|
|
21 |
import copy
|
22 |
import google.generativeai as genai
|
23 |
import urllib
|
24 |
+
import pysolr
|
25 |
|
26 |
from llm_prompts import (
|
27 |
get_analysis_plan_prompt,
|
|
|
30 |
)
|
31 |
from extract_results import get_search_list_params
|
32 |
|
|
|
33 |
def parse_suggestions_from_report(report_text):
|
34 |
"""Extracts numbered suggestions from the report's markdown text."""
|
35 |
suggestions_match = re.search(r"### (?:Deeper Dive: Suggested Follow-up Analyses|Suggestions for Further Exploration)\s*\n(.*?)$", report_text, re.DOTALL | re.IGNORECASE)
|
|
|
38 |
suggestions = re.findall(r"^\s*\d+\.\s*(.*)", suggestions_text, re.MULTILINE)
|
39 |
return [s.strip() for s in suggestions]
|
40 |
|
|
|
41 |
def llm_generate_analysis_plan_with_history(llm_model, natural_language_query, chat_history):
|
42 |
+
"""
|
43 |
+
Generates a complete analysis plan from a user query, considering chat history
|
44 |
+
and dynamic field suggestions from an external API.
|
45 |
+
"""
|
46 |
+
search_fields, search_name, field_mappings = [], "", {}
|
47 |
+
try:
|
48 |
+
search_fields, search_name, field_mappings = get_search_list_params(natural_language_query)
|
49 |
+
print(f"API returned core: '{search_name}' with {len(search_fields)} fields and {len(field_mappings)} mappings.")
|
50 |
+
except Exception as e:
|
51 |
+
print(f"Warning: Could not retrieve dynamic search fields. Proceeding without them. Error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
+
core_name = search_name if search_name else 'news'
|
|
|
54 |
|
55 |
+
mapped_search_fields = []
|
56 |
+
if search_fields and field_mappings:
|
57 |
+
for field in search_fields:
|
58 |
+
original_name = field.get('field_name')
|
59 |
+
mapped_field = field.copy()
|
60 |
+
if original_name in field_mappings:
|
61 |
+
mapped_field['field_name'] = field_mappings[original_name]
|
62 |
+
print(f"Mapped field '{original_name}' to '{mapped_field['field_name']}'")
|
63 |
+
mapped_search_fields.append(mapped_field)
|
64 |
+
else:
|
65 |
+
mapped_search_fields = search_fields
|
66 |
|
67 |
+
prompt = get_analysis_plan_prompt(natural_language_query, chat_history, mapped_search_fields, core_name)
|
68 |
|
69 |
+
try:
|
70 |
+
response = llm_model.generate_content(prompt)
|
71 |
+
cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
|
72 |
+
plan = json.loads(cleaned_text)
|
73 |
+
return plan, mapped_search_fields, core_name
|
74 |
+
except json.JSONDecodeError as e:
|
75 |
+
raw_response_text = response.text if 'response' in locals() else 'N/A'
|
76 |
+
print(f"Error decoding JSON from LLM response: {e}\nRaw Response:\n{raw_response_text}")
|
77 |
+
return None, mapped_search_fields, core_name
|
78 |
+
except Exception as e:
|
79 |
+
raw_response_text = response.text if 'response' in locals() else 'N/A'
|
80 |
+
print(f"Error in llm_generate_analysis_plan_with_history: {e}\nRaw Response:\n{raw_response_text}")
|
81 |
+
return None, mapped_search_fields, core_name
|
82 |
|
83 |
def execute_quantitative_query(solr_client, plan):
|
84 |
"""Executes the facet query to get aggregate data."""
|
85 |
if not plan or 'quantitative_request' not in plan or 'json.facet' not in plan.get('quantitative_request', {}):
|
86 |
+
print("Skipping quantitative query due to incomplete plan.")
|
87 |
return None, None
|
88 |
try:
|
89 |
params = {
|
|
|
91 |
"rows": 0,
|
92 |
"json.facet": json.dumps(plan['quantitative_request']['json.facet'])
|
93 |
}
|
|
|
94 |
base_url = f"{solr_client.url}/select"
|
95 |
query_string = urllib.parse.urlencode(params)
|
96 |
full_url = f"{base_url}?{query_string}"
|
97 |
print(f"[DEBUG] Solr QUANTITATIVE query URL: {full_url}")
|
98 |
results = solr_client.search(**params)
|
99 |
return results.raw_response.get("facets", {}), full_url
|
100 |
+
except pysolr.SolrError as e:
|
101 |
+
print(f"Solr Error in quantitative query on core {solr_client.url}: {e}")
|
102 |
+
return None, None
|
103 |
except Exception as e:
|
104 |
+
print(f"Unexpected error in quantitative query: {e}")
|
105 |
return None, None
|
106 |
|
107 |
def execute_qualitative_query(solr_client, plan):
|
108 |
"""Executes the grouping query to get the best example docs."""
|
109 |
if not plan or 'qualitative_request' not in plan:
|
110 |
+
print("Skipping qualitative query due to incomplete plan.")
|
111 |
return None, None
|
112 |
try:
|
113 |
qual_request = copy.deepcopy(plan['qualitative_request'])
|
114 |
params = {
|
115 |
"q": plan.get('query_filter', '*_*'),
|
116 |
+
"rows": 5,
|
117 |
"fl": "*,score",
|
118 |
**qual_request
|
119 |
}
|
|
|
120 |
base_url = f"{solr_client.url}/select"
|
121 |
query_string = urllib.parse.urlencode(params)
|
122 |
full_url = f"{base_url}?{query_string}"
|
123 |
print(f"[DEBUG] Solr QUALITATIVE query URL: {full_url}")
|
124 |
results = solr_client.search(**params)
|
125 |
return results.grouped, full_url
|
126 |
+
except pysolr.SolrError as e:
|
127 |
+
print(f"Solr Error in qualitative query on core {solr_client.url}: {e}")
|
128 |
+
return None, None
|
129 |
except Exception as e:
|
130 |
+
print(f"Unexpected error in qualitative query: {e}")
|
131 |
return None, None
|
132 |
|
133 |
def llm_synthesize_enriched_report_stream(llm_model, query, quantitative_data, qualitative_data, plan):
|
|
|
142 |
yield chunk.text
|
143 |
except Exception as e:
|
144 |
print(f"Error in llm_synthesize_enriched_report_stream: {e}")
|
145 |
+
yield "Sorry, an error occurred while generating the report. Please check the logs for details."
|
146 |
|
147 |
def llm_generate_visualization_code(llm_model, query_context, facet_data):
|
148 |
"""Generates Python code for visualization based on query and data."""
|
|
|
153 |
code = re.sub(r'^```python\s*|```$', '', response.text, flags=re.MULTILINE)
|
154 |
return code
|
155 |
except Exception as e:
|
156 |
+
raw_response_text = response.text if 'response' in locals() else 'N/A'
|
157 |
+
print(f"Error in llm_generate_visualization_code: {e}\nRaw response: {raw_response_text}")
|
158 |
return None
|
159 |
|
160 |
def execute_viz_code_and_get_path(viz_code, facet_data):
|
161 |
"""Executes visualization code and returns the path to the saved plot image."""
|
162 |
+
if not viz_code:
|
163 |
+
return None
|
164 |
+
|
165 |
+
# --- SECURITY WARNING ---
|
166 |
+
# The following code executes code generated by an LLM. This is a security
|
167 |
+
# risk and should be handled with extreme care in a production environment.
|
168 |
+
# Ideally, this code should be run in a sandboxed environment.
|
169 |
+
print("\n--- WARNING: Executing LLM-generated code. ---")
|
170 |
+
|
171 |
try:
|
172 |
+
if not os.path.exists('/tmp/plots'):
|
173 |
+
os.makedirs('/tmp/plots')
|
174 |
plot_path = f"/tmp/plots/plot_{datetime.datetime.now().timestamp()}.png"
|
175 |
+
|
176 |
+
# Create a restricted global environment for execution
|
177 |
exec_globals = {'facet_data': facet_data, 'plt': plt, 'sns': sns, 'pd': pd}
|
178 |
+
|
179 |
+
|
180 |
exec(viz_code, exec_globals)
|
181 |
+
|
182 |
fig = exec_globals.get('fig')
|
183 |
if fig:
|
184 |
fig.savefig(plot_path, bbox_inches='tight')
|
185 |
plt.close(fig)
|
186 |
+
print("--- LLM-generated code executed successfully. ---")
|
187 |
return plot_path
|
188 |
+
else:
|
189 |
+
print("--- LLM-generated code did not produce a 'fig' object. ---")
|
190 |
+
return None
|
191 |
+
|
192 |
except Exception as e:
|
193 |
+
print(f"\n--- ERROR executing visualization code: ---")
|
194 |
+
print(f"Error: {e}")
|
195 |
+
print(f"--- Code---\n{viz_code}")
|
196 |
+
print("-----------------------------------------")
|
197 |
+
return None
|
extract_results.py
CHANGED
@@ -26,42 +26,50 @@ def get_search_list_params(query, k=20):
|
|
26 |
"""
|
27 |
url = "https://aitest.ebalina.com/stream"
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
|
59 |
-
|
60 |
-
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
26 |
"""
|
27 |
url = "https://aitest.ebalina.com/stream"
|
28 |
|
29 |
+
try:
|
30 |
+
response = requests.post(
|
31 |
+
url,
|
32 |
+
headers={'Content-Type': 'application/json'},
|
33 |
+
json={"query": query, "k": k},
|
34 |
+
stream=True,
|
35 |
+
timeout=30 # Add a 30-second timeout
|
36 |
+
)
|
37 |
+
response.raise_for_status() # Raise an exception for bad status codes
|
38 |
+
|
39 |
+
search_fields = []
|
40 |
+
search_name = ""
|
41 |
+
field_mappings_str = ""
|
42 |
|
43 |
+
for line in response.iter_lines():
|
44 |
+
if line and line.startswith(b'data: '):
|
45 |
+
try:
|
46 |
+
line_str = line.decode('utf-8')[6:]
|
47 |
+
if not line_str or line_str.isspace():
|
48 |
+
continue
|
49 |
+
|
50 |
+
data = json.loads(line_str)
|
51 |
+
log_title = data.get('log_title')
|
52 |
|
53 |
+
if log_title == 'Search List Result':
|
54 |
+
content = data.get('content', '')
|
55 |
+
if content:
|
56 |
+
yaml_data = yaml.safe_load(content)
|
57 |
+
print("DEBUG:", yaml_data)
|
58 |
+
# This is the dynamic core name
|
59 |
+
search_name = yaml_data.get('search_name', '')
|
60 |
+
search_fields = yaml_data.get('search_fields', [])
|
61 |
|
62 |
+
elif log_title == 'Field Mapping Outputs':
|
63 |
+
field_mappings_str = data.get('content', '')
|
64 |
|
65 |
+
except (json.JSONDecodeError, yaml.YAMLError, AttributeError) as e:
|
66 |
+
print(f"Error parsing stream line: {e}\nLine: {line_str}")
|
67 |
+
continue
|
68 |
+
|
69 |
+
field_mappings = _parse_mappings(field_mappings_str)
|
70 |
+
|
71 |
+
return search_fields, search_name, field_mappings
|
72 |
+
|
73 |
+
except requests.exceptions.RequestException as e:
|
74 |
+
print(f"Error connecting to the external API: {e}")
|
75 |
+
return [], "", {}
|
ui.py
CHANGED
@@ -11,6 +11,7 @@ by calling functions from the data_processing module.
|
|
11 |
import gradio as gr
|
12 |
import json
|
13 |
import concurrent.futures
|
|
|
14 |
from data_processing import (
|
15 |
llm_generate_analysis_plan_with_history,
|
16 |
execute_quantitative_query,
|
@@ -21,6 +22,9 @@ from data_processing import (
|
|
21 |
parse_suggestions_from_report
|
22 |
)
|
23 |
|
|
|
|
|
|
|
24 |
|
25 |
def create_ui(llm_model, solr_client):
|
26 |
"""
|
@@ -121,28 +125,27 @@ def create_ui(llm_model, solr_client):
|
|
121 |
yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, suggestions_display_update)
|
122 |
|
123 |
# --- DYNAMIC CORE SWITCH (Thread-safe) ---
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
# ---
|
146 |
|
147 |
if not aggregate_data or aggregate_data.get('count', 0) == 0:
|
148 |
history.append((None, f"No data was found for your query in the '{core_name}' core. Please try a different question."))
|
|
|
11 |
import gradio as gr
|
12 |
import json
|
13 |
import concurrent.futures
|
14 |
+
import threading
|
15 |
from data_processing import (
|
16 |
llm_generate_analysis_plan_with_history,
|
17 |
execute_quantitative_query,
|
|
|
22 |
parse_suggestions_from_report
|
23 |
)
|
24 |
|
25 |
+
# Create a lock to protect the Solr client from concurrent access
|
26 |
+
solr_lock = threading.Lock()
|
27 |
+
|
28 |
|
29 |
def create_ui(llm_model, solr_client):
|
30 |
"""
|
|
|
125 |
yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, suggestions_display_update)
|
126 |
|
127 |
# --- DYNAMIC CORE SWITCH (Thread-safe) ---
|
128 |
+
with solr_lock:
|
129 |
+
original_solr_url = solr_client.url
|
130 |
+
# Correctly construct the new URL by replacing the last component (the core name)
|
131 |
+
base_url = original_solr_url.rsplit('/', 1)[0]
|
132 |
+
new_url = f"{base_url}/{core_name}"
|
133 |
+
solr_client.url = new_url
|
134 |
+
print(f"[INFO] Switched Solr client to core: {core_name} at URL: {solr_client.url}")
|
135 |
+
|
136 |
+
# Execute queries in parallel
|
137 |
+
aggregate_data, quantitative_url = None, None
|
138 |
+
example_data, qualitative_url = None, None
|
139 |
+
try:
|
140 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
141 |
+
future_agg = executor.submit(execute_quantitative_query, solr_client, analysis_plan)
|
142 |
+
future_ex = executor.submit(execute_qualitative_query, solr_client, analysis_plan)
|
143 |
+
aggregate_data, quantitative_url = future_agg.result()
|
144 |
+
example_data, qualitative_url = future_ex.result()
|
145 |
+
finally:
|
146 |
+
# --- IMPORTANT: Reset client to default URL ---
|
147 |
+
solr_client.url = original_solr_url
|
148 |
+
print(f"[INFO] Reset Solr client to default URL: {original_solr_url}")
|
|
|
149 |
|
150 |
if not aggregate_data or aggregate_data.get('count', 0) == 0:
|
151 |
history.append((None, f"No data was found for your query in the '{core_name}' core. Please try a different question."))
|