Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,24 @@
|
|
1 |
import json
|
|
|
2 |
import requests
|
3 |
import streamlit as st
|
4 |
import pdfplumber
|
5 |
import pandas as pd
|
6 |
import sqlalchemy
|
7 |
-
from typing import Any, Dict, List, Optional, Union
|
8 |
from functools import lru_cache
|
9 |
|
10 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
try:
|
12 |
from openai import OpenAI
|
13 |
except ImportError:
|
@@ -19,6 +30,13 @@ except ImportError:
|
|
19 |
groq = None
|
20 |
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
class SyntheticDataGenerator:
|
23 |
"""World's Most Advanced Synthetic Data Generation System"""
|
24 |
|
@@ -46,42 +64,36 @@ class SyntheticDataGenerator:
|
|
46 |
}
|
47 |
|
48 |
def __init__(self) -> None:
|
49 |
-
"""Initialize session state, input handlers, and providers."""
|
50 |
self._init_session_state()
|
51 |
-
self._setup_input_handlers()
|
52 |
self._setup_providers()
|
|
|
53 |
|
54 |
def _init_session_state(self) -> None:
|
55 |
-
"""Initialize
|
56 |
defaults = {
|
57 |
"active_provider": "OpenAI",
|
58 |
"api_keys": {},
|
59 |
-
"
|
60 |
-
"generation_results": [],
|
61 |
-
"system_metrics": {
|
62 |
-
"api_calls": 0,
|
63 |
-
"tokens_used": 0,
|
64 |
-
"error_count": 0,
|
65 |
-
},
|
66 |
"error_logs": [],
|
67 |
"debug_mode": False,
|
68 |
}
|
69 |
-
for key,
|
70 |
if key not in st.session_state:
|
71 |
-
st.session_state[key] =
|
72 |
|
73 |
def _setup_providers(self) -> None:
|
74 |
-
"""Configure available providers
|
75 |
self.available_providers: List[str] = []
|
76 |
for provider, config in self.PROVIDER_CONFIG.items():
|
77 |
required_lib = config.get("requires_library")
|
78 |
if required_lib and not globals().get(required_lib.title()):
|
79 |
-
|
|
|
80 |
self.available_providers.append(provider)
|
81 |
|
82 |
def _setup_input_handlers(self) -> None:
|
83 |
-
"""Register
|
84 |
-
self.input_processors = {
|
85 |
"text": self._process_text,
|
86 |
"pdf": self._process_pdf,
|
87 |
"csv": self._process_csv,
|
@@ -93,65 +105,53 @@ class SyntheticDataGenerator:
|
|
93 |
@lru_cache(maxsize=100)
|
94 |
def generate(self, provider: str, model: str, prompt: str) -> Dict[str, Any]:
|
95 |
"""
|
96 |
-
Unified generation endpoint with failover support.
|
97 |
-
Caches results to improve performance.
|
98 |
"""
|
99 |
try:
|
100 |
if provider not in self.available_providers:
|
101 |
-
raise
|
102 |
-
|
103 |
client = self._get_client(provider)
|
104 |
if not client:
|
105 |
-
raise
|
106 |
-
|
107 |
return self._execute_generation(client, provider, model, prompt)
|
108 |
-
|
109 |
except Exception as e:
|
110 |
-
self._log_error(f"Generation
|
111 |
return self._failover_generation(provider, model, prompt)
|
112 |
|
113 |
def _get_client(self, provider: str) -> Any:
|
114 |
"""
|
115 |
-
|
116 |
-
Raises
|
117 |
"""
|
118 |
config = self.PROVIDER_CONFIG[provider]
|
119 |
api_key = st.session_state["api_keys"].get(provider, "")
|
120 |
-
|
121 |
if not api_key:
|
122 |
-
raise
|
123 |
-
|
124 |
try:
|
125 |
if provider == "Groq":
|
126 |
return groq.Groq(api_key=api_key)
|
127 |
elif provider == "HuggingFace":
|
128 |
return {"headers": {"Authorization": f"Bearer {api_key}"}}
|
129 |
else:
|
130 |
-
# Initialize OpenAI client for OpenAI and Deepseek
|
131 |
return OpenAI(
|
132 |
base_url=config["base_url"],
|
133 |
api_key=api_key,
|
134 |
-
timeout=30
|
135 |
)
|
136 |
except Exception as e:
|
137 |
-
self._log_error(f"
|
138 |
-
|
139 |
|
140 |
def _execute_generation(self, client: Any, provider: str, model: str, prompt: str) -> Dict[str, Any]:
|
141 |
"""
|
142 |
-
Execute
|
143 |
-
|
144 |
"""
|
145 |
st.session_state["system_metrics"]["api_calls"] += 1
|
146 |
|
147 |
if provider == "HuggingFace":
|
148 |
url = self.PROVIDER_CONFIG[provider]["base_url"] + model
|
149 |
-
response = requests.post(
|
150 |
-
url,
|
151 |
-
headers=client["headers"],
|
152 |
-
json={"inputs": prompt},
|
153 |
-
timeout=30
|
154 |
-
)
|
155 |
response.raise_for_status()
|
156 |
return response.json()
|
157 |
else:
|
@@ -159,77 +159,70 @@ class SyntheticDataGenerator:
|
|
159 |
model=model,
|
160 |
messages=[{"role": "user", "content": prompt}],
|
161 |
temperature=0.1,
|
162 |
-
max_tokens=2000
|
163 |
)
|
164 |
-
# Update token usage if available
|
165 |
if hasattr(completion.usage, "total_tokens"):
|
166 |
st.session_state["system_metrics"]["tokens_used"] += completion.usage.total_tokens
|
167 |
try:
|
168 |
-
|
169 |
-
except
|
170 |
-
|
171 |
-
return result
|
172 |
|
173 |
def _failover_generation(self, original_provider: str, model: str, prompt: str) -> Dict[str, Any]:
|
174 |
"""
|
175 |
-
|
176 |
-
Tries available providers (excluding the original) until one succeeds.
|
177 |
"""
|
178 |
for backup_provider in self.available_providers:
|
179 |
if backup_provider == original_provider:
|
180 |
continue
|
181 |
-
# Determine model to use: if the desired model is available, use it; otherwise use the first available model.
|
182 |
backup_models = self.PROVIDER_CONFIG[backup_provider]["models"]
|
183 |
backup_model = model if model in backup_models else backup_models[0]
|
184 |
try:
|
185 |
st.session_state["active_provider"] = backup_provider
|
186 |
result = self.generate(backup_provider, backup_model, prompt)
|
187 |
-
|
188 |
-
self._log_error(f"Failover succeeded with provider '{backup_provider}' using model '{backup_model}'.")
|
189 |
return result
|
190 |
except Exception as e:
|
191 |
-
self._log_error(f"Failover attempt with
|
192 |
-
|
193 |
-
raise RuntimeError("All generation providers are unavailable.")
|
194 |
|
195 |
# --- Input Processors ---
|
196 |
def _process_text(self, text: str) -> str:
|
197 |
-
"""
|
198 |
return text.strip()
|
199 |
|
200 |
def _process_pdf(self, file) -> str:
|
201 |
-
"""
|
202 |
try:
|
203 |
with pdfplumber.open(file) as pdf:
|
204 |
-
return "\n".join(page.extract_text() or "" for page in pdf.pages)
|
205 |
except Exception as e:
|
206 |
-
self._log_error(f"PDF
|
207 |
return ""
|
208 |
|
209 |
def _process_csv(self, file) -> str:
|
210 |
-
"""
|
211 |
try:
|
212 |
df = pd.read_csv(file)
|
213 |
return df.to_csv(index=False)
|
214 |
except Exception as e:
|
215 |
-
self._log_error(f"CSV
|
216 |
return ""
|
217 |
|
218 |
def _process_api(self, api_url: str) -> str:
|
219 |
-
"""
|
220 |
try:
|
221 |
response = requests.get(api_url, timeout=10)
|
222 |
response.raise_for_status()
|
223 |
-
|
224 |
-
return json.dumps(data, indent=2)
|
225 |
except Exception as e:
|
226 |
-
self._log_error(f"API
|
227 |
return ""
|
228 |
|
229 |
def _process_database(self, config: Dict[str, str]) -> str:
|
230 |
"""
|
231 |
-
|
232 |
-
Expects a
|
233 |
"""
|
234 |
try:
|
235 |
connection_string = config.get("connection_string", "")
|
@@ -238,45 +231,43 @@ class SyntheticDataGenerator:
|
|
238 |
raise ValueError("Missing connection string or query.")
|
239 |
engine = sqlalchemy.create_engine(connection_string)
|
240 |
with engine.connect() as connection:
|
241 |
-
|
242 |
-
return
|
243 |
except Exception as e:
|
244 |
-
self._log_error(f"Database
|
245 |
return ""
|
246 |
|
247 |
def _process_web(self, url: str) -> str:
|
248 |
-
"""
|
249 |
try:
|
250 |
-
response = requests.get(url, headers={
|
251 |
-
"User-Agent": "Mozilla/5.0 (compatible; SyntheticBot/1.0)"
|
252 |
-
}, timeout=10)
|
253 |
response.raise_for_status()
|
254 |
return response.text
|
255 |
except Exception as e:
|
256 |
-
self._log_error(f"Web
|
257 |
return ""
|
258 |
|
259 |
-
# ---
|
260 |
def _log_error(self, message: str) -> None:
|
261 |
-
"""
|
262 |
st.session_state["system_metrics"]["error_count"] += 1
|
263 |
st.session_state["error_logs"].append(message)
|
|
|
264 |
if st.session_state.get("debug_mode"):
|
265 |
st.error(f"[DEBUG] {message}")
|
266 |
|
267 |
def health_check(self) -> Dict[str, Any]:
|
268 |
-
"""
|
|
|
|
|
269 |
return {
|
270 |
"providers_available": self.available_providers,
|
271 |
-
"api_connectivity":
|
272 |
-
provider: self._test_provider_connectivity(provider)
|
273 |
-
for provider in self.available_providers
|
274 |
-
},
|
275 |
"system_metrics": st.session_state["system_metrics"],
|
276 |
}
|
277 |
|
278 |
def _test_provider_connectivity(self, provider: str) -> bool:
|
279 |
-
"""
|
280 |
try:
|
281 |
client = self._get_client(provider)
|
282 |
if provider == "HuggingFace":
|
@@ -284,23 +275,23 @@ class SyntheticDataGenerator:
|
|
284 |
response = requests.get(url, headers=client["headers"], timeout=5)
|
285 |
return response.status_code == 200
|
286 |
else:
|
287 |
-
# Assuming client has a models.list() method for other providers.
|
288 |
client.models.list()
|
289 |
return True
|
290 |
-
except Exception:
|
|
|
291 |
return False
|
292 |
|
293 |
|
294 |
-
# ---
|
295 |
-
def provider_config_ui(
|
296 |
-
"""
|
297 |
with st.sidebar:
|
298 |
st.header("⚙️ AI Engine Configuration")
|
299 |
provider = st.selectbox(
|
300 |
"AI Provider",
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
)
|
305 |
st.session_state["active_provider"] = provider
|
306 |
|
@@ -308,30 +299,25 @@ def provider_config_ui(gen: SyntheticDataGenerator) -> None:
|
|
308 |
f"{provider} API Key",
|
309 |
type="password",
|
310 |
value=st.session_state["api_keys"].get(provider, ""),
|
311 |
-
help=f"
|
312 |
)
|
313 |
st.session_state["api_keys"][provider] = api_key
|
314 |
|
315 |
model = st.selectbox(
|
316 |
"Model",
|
317 |
-
|
318 |
-
help="Select model
|
319 |
)
|
320 |
|
321 |
if st.button("Run Health Check"):
|
322 |
-
report =
|
323 |
st.json(report)
|
324 |
|
325 |
|
326 |
def main() -> None:
|
327 |
-
"""
|
328 |
-
st.set_page_config(
|
329 |
-
|
330 |
-
page_icon="🏭",
|
331 |
-
layout="wide"
|
332 |
-
)
|
333 |
-
|
334 |
-
gen = SyntheticDataGenerator()
|
335 |
|
336 |
st.title("🏭 Synthetic Data Factory Pro")
|
337 |
st.markdown(
|
@@ -341,13 +327,13 @@ def main() -> None:
|
|
341 |
"""
|
342 |
)
|
343 |
|
344 |
-
provider_config_ui(
|
345 |
|
346 |
-
#
|
347 |
st.subheader("Input Data")
|
348 |
-
input_type = st.selectbox("Select Input Type", list(
|
349 |
if input_type == "text":
|
350 |
-
user_input = st.text_area("Enter your text
|
351 |
elif input_type == "pdf":
|
352 |
user_input = st.file_uploader("Upload a PDF file", type=["pdf"])
|
353 |
elif input_type == "csv":
|
@@ -361,29 +347,32 @@ def main() -> None:
|
|
361 |
|
362 |
processed_input = ""
|
363 |
if st.button("Process Input"):
|
364 |
-
processor =
|
365 |
if processor:
|
366 |
-
if input_type in
|
367 |
processed_input = processor(user_input)
|
368 |
elif input_type == "database":
|
369 |
try:
|
370 |
-
|
371 |
-
processed_input = processor(
|
372 |
-
except
|
373 |
-
st.error("Invalid JSON for database
|
|
|
374 |
else:
|
375 |
processed_input = processor(user_input)
|
376 |
st.text_area("Processed Input", value=processed_input, height=200)
|
377 |
|
378 |
-
# Generation
|
379 |
st.subheader("Generate Synthetic Data")
|
380 |
-
prompt = st.text_area("Enter your prompt
|
381 |
if st.button("Generate"):
|
382 |
active_provider = st.session_state.get("active_provider", "OpenAI")
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
|
|
|
|
387 |
|
388 |
|
389 |
if __name__ == "__main__":
|
|
|
1 |
import json
|
2 |
+
import logging
|
3 |
import requests
|
4 |
import streamlit as st
|
5 |
import pdfplumber
|
6 |
import pandas as pd
|
7 |
import sqlalchemy
|
8 |
+
from typing import Any, Dict, List, Optional, Union, Callable
|
9 |
from functools import lru_cache
|
10 |
|
11 |
+
# --- Logging Configuration ---
|
12 |
+
logger = logging.getLogger("SyntheticDataGenerator")
|
13 |
+
logger.setLevel(logging.DEBUG)
|
14 |
+
if not logger.handlers:
|
15 |
+
ch = logging.StreamHandler()
|
16 |
+
ch.setLevel(logging.DEBUG)
|
17 |
+
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
18 |
+
ch.setFormatter(formatter)
|
19 |
+
logger.addHandler(ch)
|
20 |
+
|
21 |
+
# --- Provider Clients with Import Guards ---
|
22 |
try:
|
23 |
from openai import OpenAI
|
24 |
except ImportError:
|
|
|
30 |
groq = None
|
31 |
|
32 |
|
33 |
+
# --- Custom Exceptions ---
|
34 |
+
class ProviderClientError(Exception):
|
35 |
+
"""Custom exception for provider client issues."""
|
36 |
+
pass
|
37 |
+
|
38 |
+
|
39 |
+
# --- Core Synthetic Data Generator ---
|
40 |
class SyntheticDataGenerator:
|
41 |
"""World's Most Advanced Synthetic Data Generation System"""
|
42 |
|
|
|
64 |
}
|
65 |
|
66 |
def __init__(self) -> None:
|
|
|
67 |
self._init_session_state()
|
|
|
68 |
self._setup_providers()
|
69 |
+
self._setup_input_handlers()
|
70 |
|
71 |
def _init_session_state(self) -> None:
|
72 |
+
"""Initialize session state with default values."""
|
73 |
defaults = {
|
74 |
"active_provider": "OpenAI",
|
75 |
"api_keys": {},
|
76 |
+
"system_metrics": {"api_calls": 0, "tokens_used": 0, "error_count": 0},
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
"error_logs": [],
|
78 |
"debug_mode": False,
|
79 |
}
|
80 |
+
for key, value in defaults.items():
|
81 |
if key not in st.session_state:
|
82 |
+
st.session_state[key] = value
|
83 |
|
84 |
def _setup_providers(self) -> None:
|
85 |
+
"""Configure available providers based on dependency availability."""
|
86 |
self.available_providers: List[str] = []
|
87 |
for provider, config in self.PROVIDER_CONFIG.items():
|
88 |
required_lib = config.get("requires_library")
|
89 |
if required_lib and not globals().get(required_lib.title()):
|
90 |
+
logger.warning(f"Skipping provider {provider} due to missing dependency: {required_lib}")
|
91 |
+
continue
|
92 |
self.available_providers.append(provider)
|
93 |
|
94 |
def _setup_input_handlers(self) -> None:
|
95 |
+
"""Register input processors."""
|
96 |
+
self.input_processors: Dict[str, Callable[[Any], str]] = {
|
97 |
"text": self._process_text,
|
98 |
"pdf": self._process_pdf,
|
99 |
"csv": self._process_csv,
|
|
|
105 |
@lru_cache(maxsize=100)
|
106 |
def generate(self, provider: str, model: str, prompt: str) -> Dict[str, Any]:
|
107 |
"""
|
108 |
+
Unified generation endpoint with caching and failover support.
|
|
|
109 |
"""
|
110 |
try:
|
111 |
if provider not in self.available_providers:
|
112 |
+
raise ProviderClientError(f"Provider {provider} is not available.")
|
|
|
113 |
client = self._get_client(provider)
|
114 |
if not client:
|
115 |
+
raise ProviderClientError(f"Client initialization failed for provider {provider}.")
|
|
|
116 |
return self._execute_generation(client, provider, model, prompt)
|
|
|
117 |
except Exception as e:
|
118 |
+
self._log_error(f"Generation error using provider '{provider}': {e}")
|
119 |
return self._failover_generation(provider, model, prompt)
|
120 |
|
121 |
def _get_client(self, provider: str) -> Any:
|
122 |
"""
|
123 |
+
Initialize and return a client for the specified provider.
|
124 |
+
Raises ProviderClientError if API key or dependency issues occur.
|
125 |
"""
|
126 |
config = self.PROVIDER_CONFIG[provider]
|
127 |
api_key = st.session_state["api_keys"].get(provider, "")
|
|
|
128 |
if not api_key:
|
129 |
+
raise ProviderClientError(f"Missing API key for {provider}.")
|
|
|
130 |
try:
|
131 |
if provider == "Groq":
|
132 |
return groq.Groq(api_key=api_key)
|
133 |
elif provider == "HuggingFace":
|
134 |
return {"headers": {"Authorization": f"Bearer {api_key}"}}
|
135 |
else:
|
|
|
136 |
return OpenAI(
|
137 |
base_url=config["base_url"],
|
138 |
api_key=api_key,
|
139 |
+
timeout=30,
|
140 |
)
|
141 |
except Exception as e:
|
142 |
+
self._log_error(f"Error initializing client for {provider}: {e}")
|
143 |
+
raise ProviderClientError(f"Client init error for {provider}")
|
144 |
|
145 |
def _execute_generation(self, client: Any, provider: str, model: str, prompt: str) -> Dict[str, Any]:
|
146 |
"""
|
147 |
+
Execute the generation request for the given provider.
|
148 |
+
Updates system metrics and returns the result.
|
149 |
"""
|
150 |
st.session_state["system_metrics"]["api_calls"] += 1
|
151 |
|
152 |
if provider == "HuggingFace":
|
153 |
url = self.PROVIDER_CONFIG[provider]["base_url"] + model
|
154 |
+
response = requests.post(url, headers=client["headers"], json={"inputs": prompt}, timeout=30)
|
|
|
|
|
|
|
|
|
|
|
155 |
response.raise_for_status()
|
156 |
return response.json()
|
157 |
else:
|
|
|
159 |
model=model,
|
160 |
messages=[{"role": "user", "content": prompt}],
|
161 |
temperature=0.1,
|
162 |
+
max_tokens=2000,
|
163 |
)
|
|
|
164 |
if hasattr(completion.usage, "total_tokens"):
|
165 |
st.session_state["system_metrics"]["tokens_used"] += completion.usage.total_tokens
|
166 |
try:
|
167 |
+
return json.loads(completion.choices[0].message.content)
|
168 |
+
except Exception:
|
169 |
+
return {"response": completion.choices[0].message.content}
|
|
|
170 |
|
171 |
def _failover_generation(self, original_provider: str, model: str, prompt: str) -> Dict[str, Any]:
|
172 |
"""
|
173 |
+
Attempt to generate synthetic data using alternative providers.
|
|
|
174 |
"""
|
175 |
for backup_provider in self.available_providers:
|
176 |
if backup_provider == original_provider:
|
177 |
continue
|
|
|
178 |
backup_models = self.PROVIDER_CONFIG[backup_provider]["models"]
|
179 |
backup_model = model if model in backup_models else backup_models[0]
|
180 |
try:
|
181 |
st.session_state["active_provider"] = backup_provider
|
182 |
result = self.generate(backup_provider, backup_model, prompt)
|
183 |
+
self._log_error(f"Failover succeeded: provider '{backup_provider}' with model '{backup_model}'")
|
|
|
184 |
return result
|
185 |
except Exception as e:
|
186 |
+
self._log_error(f"Failover attempt with {backup_provider} failed: {e}")
|
187 |
+
raise ProviderClientError("All generation providers failed.")
|
|
|
188 |
|
189 |
# --- Input Processors ---
|
190 |
def _process_text(self, text: str) -> str:
|
191 |
+
"""Strip and return plain text input."""
|
192 |
return text.strip()
|
193 |
|
194 |
def _process_pdf(self, file) -> str:
|
195 |
+
"""Extract and return text from a PDF file."""
|
196 |
try:
|
197 |
with pdfplumber.open(file) as pdf:
|
198 |
+
return "\n".join((page.extract_text() or "") for page in pdf.pages)
|
199 |
except Exception as e:
|
200 |
+
self._log_error(f"PDF processing error: {e}")
|
201 |
return ""
|
202 |
|
203 |
def _process_csv(self, file) -> str:
|
204 |
+
"""Convert CSV file to string via DataFrame conversion."""
|
205 |
try:
|
206 |
df = pd.read_csv(file)
|
207 |
return df.to_csv(index=False)
|
208 |
except Exception as e:
|
209 |
+
self._log_error(f"CSV processing error: {e}")
|
210 |
return ""
|
211 |
|
212 |
def _process_api(self, api_url: str) -> str:
|
213 |
+
"""Fetch and return JSON data from the provided API URL."""
|
214 |
try:
|
215 |
response = requests.get(api_url, timeout=10)
|
216 |
response.raise_for_status()
|
217 |
+
return json.dumps(response.json(), indent=2)
|
|
|
218 |
except Exception as e:
|
219 |
+
self._log_error(f"API processing error: {e}")
|
220 |
return ""
|
221 |
|
222 |
def _process_database(self, config: Dict[str, str]) -> str:
|
223 |
"""
|
224 |
+
Execute a database query using a provided configuration.
|
225 |
+
Expects a dict with 'connection_string' and 'query' keys.
|
226 |
"""
|
227 |
try:
|
228 |
connection_string = config.get("connection_string", "")
|
|
|
231 |
raise ValueError("Missing connection string or query.")
|
232 |
engine = sqlalchemy.create_engine(connection_string)
|
233 |
with engine.connect() as connection:
|
234 |
+
df = pd.read_sql(query, connection)
|
235 |
+
return df.to_csv(index=False)
|
236 |
except Exception as e:
|
237 |
+
self._log_error(f"Database processing error: {e}")
|
238 |
return ""
|
239 |
|
240 |
def _process_web(self, url: str) -> str:
|
241 |
+
"""Fetch and return webpage content using anti-bot headers."""
|
242 |
try:
|
243 |
+
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (SyntheticBot/1.0)"}, timeout=10)
|
|
|
|
|
244 |
response.raise_for_status()
|
245 |
return response.text
|
246 |
except Exception as e:
|
247 |
+
self._log_error(f"Web extraction error: {e}")
|
248 |
return ""
|
249 |
|
250 |
+
# --- Logging & Diagnostics ---
|
251 |
def _log_error(self, message: str) -> None:
|
252 |
+
"""Log errors centrally and update system metrics."""
|
253 |
st.session_state["system_metrics"]["error_count"] += 1
|
254 |
st.session_state["error_logs"].append(message)
|
255 |
+
logger.error(message)
|
256 |
if st.session_state.get("debug_mode"):
|
257 |
st.error(f"[DEBUG] {message}")
|
258 |
|
259 |
def health_check(self) -> Dict[str, Any]:
|
260 |
+
"""Return diagnostics including provider connectivity and system metrics."""
|
261 |
+
connectivity = {provider: self._test_provider_connectivity(provider)
|
262 |
+
for provider in self.available_providers}
|
263 |
return {
|
264 |
"providers_available": self.available_providers,
|
265 |
+
"api_connectivity": connectivity,
|
|
|
|
|
|
|
266 |
"system_metrics": st.session_state["system_metrics"],
|
267 |
}
|
268 |
|
269 |
def _test_provider_connectivity(self, provider: str) -> bool:
|
270 |
+
"""Test connectivity for a given provider."""
|
271 |
try:
|
272 |
client = self._get_client(provider)
|
273 |
if provider == "HuggingFace":
|
|
|
275 |
response = requests.get(url, headers=client["headers"], timeout=5)
|
276 |
return response.status_code == 200
|
277 |
else:
|
|
|
278 |
client.models.list()
|
279 |
return True
|
280 |
+
except Exception as e:
|
281 |
+
self._log_error(f"Connectivity test failed for {provider}: {e}")
|
282 |
return False
|
283 |
|
284 |
|
285 |
+
# --- Streamlit UI Components ---
|
286 |
+
def provider_config_ui(generator: SyntheticDataGenerator) -> None:
|
287 |
+
"""Provider configuration and health check UI."""
|
288 |
with st.sidebar:
|
289 |
st.header("⚙️ AI Engine Configuration")
|
290 |
provider = st.selectbox(
|
291 |
"AI Provider",
|
292 |
+
generator.available_providers,
|
293 |
+
index=generator.available_providers.index(st.session_state.get("active_provider", "OpenAI")),
|
294 |
+
help="Select your preferred AI provider."
|
295 |
)
|
296 |
st.session_state["active_provider"] = provider
|
297 |
|
|
|
299 |
f"{provider} API Key",
|
300 |
type="password",
|
301 |
value=st.session_state["api_keys"].get(provider, ""),
|
302 |
+
help=f"Enter your API key for {provider}."
|
303 |
)
|
304 |
st.session_state["api_keys"][provider] = api_key
|
305 |
|
306 |
model = st.selectbox(
|
307 |
"Model",
|
308 |
+
generator.PROVIDER_CONFIG[provider]["models"],
|
309 |
+
help="Select the model to use."
|
310 |
)
|
311 |
|
312 |
if st.button("Run Health Check"):
|
313 |
+
report = generator.health_check()
|
314 |
st.json(report)
|
315 |
|
316 |
|
317 |
def main() -> None:
|
318 |
+
"""Main Streamlit UI entry point."""
|
319 |
+
st.set_page_config(page_title="Synthetic Data Factory Pro", page_icon="🏭", layout="wide")
|
320 |
+
generator = SyntheticDataGenerator()
|
|
|
|
|
|
|
|
|
|
|
321 |
|
322 |
st.title("🏭 Synthetic Data Factory Pro")
|
323 |
st.markdown(
|
|
|
327 |
"""
|
328 |
)
|
329 |
|
330 |
+
provider_config_ui(generator)
|
331 |
|
332 |
+
# --- Input Data Section ---
|
333 |
st.subheader("Input Data")
|
334 |
+
input_type = st.selectbox("Select Input Type", list(generator.input_processors.keys()))
|
335 |
if input_type == "text":
|
336 |
+
user_input = st.text_area("Enter your text:")
|
337 |
elif input_type == "pdf":
|
338 |
user_input = st.file_uploader("Upload a PDF file", type=["pdf"])
|
339 |
elif input_type == "csv":
|
|
|
347 |
|
348 |
processed_input = ""
|
349 |
if st.button("Process Input"):
|
350 |
+
processor = generator.input_processors.get(input_type)
|
351 |
if processor:
|
352 |
+
if input_type in ("pdf", "csv"):
|
353 |
processed_input = processor(user_input)
|
354 |
elif input_type == "database":
|
355 |
try:
|
356 |
+
config = json.loads(user_input)
|
357 |
+
processed_input = processor(config)
|
358 |
+
except Exception as e:
|
359 |
+
st.error("Invalid JSON configuration for database.")
|
360 |
+
processed_input = ""
|
361 |
else:
|
362 |
processed_input = processor(user_input)
|
363 |
st.text_area("Processed Input", value=processed_input, height=200)
|
364 |
|
365 |
+
# --- Data Generation Section ---
|
366 |
st.subheader("Generate Synthetic Data")
|
367 |
+
prompt = st.text_area("Enter your generation prompt:")
|
368 |
if st.button("Generate"):
|
369 |
active_provider = st.session_state.get("active_provider", "OpenAI")
|
370 |
+
model = st.selectbox("Select Generation Model", generator.PROVIDER_CONFIG[active_provider]["models"])
|
371 |
+
try:
|
372 |
+
result = generator.generate(active_provider, model, prompt)
|
373 |
+
st.json(result)
|
374 |
+
except Exception as e:
|
375 |
+
st.error(f"Data generation failed: {e}")
|
376 |
|
377 |
|
378 |
if __name__ == "__main__":
|