mgbam commited on
Commit
3db2361
·
verified ·
1 Parent(s): 4018394

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -121
app.py CHANGED
@@ -1,13 +1,24 @@
1
  import json
 
2
  import requests
3
  import streamlit as st
4
  import pdfplumber
5
  import pandas as pd
6
  import sqlalchemy
7
- from typing import Any, Dict, List, Optional, Union
8
  from functools import lru_cache
9
 
10
- # Provider clients with import guards
 
 
 
 
 
 
 
 
 
 
11
  try:
12
  from openai import OpenAI
13
  except ImportError:
@@ -19,6 +30,13 @@ except ImportError:
19
  groq = None
20
 
21
 
 
 
 
 
 
 
 
22
  class SyntheticDataGenerator:
23
  """World's Most Advanced Synthetic Data Generation System"""
24
 
@@ -46,42 +64,36 @@ class SyntheticDataGenerator:
46
  }
47
 
48
  def __init__(self) -> None:
49
- """Initialize session state, input handlers, and providers."""
50
  self._init_session_state()
51
- self._setup_input_handlers()
52
  self._setup_providers()
 
53
 
54
  def _init_session_state(self) -> None:
55
- """Initialize enterprise-grade session management with default values."""
56
  defaults = {
57
  "active_provider": "OpenAI",
58
  "api_keys": {},
59
- "input_sources": [],
60
- "generation_results": [],
61
- "system_metrics": {
62
- "api_calls": 0,
63
- "tokens_used": 0,
64
- "error_count": 0,
65
- },
66
  "error_logs": [],
67
  "debug_mode": False,
68
  }
69
- for key, val in defaults.items():
70
  if key not in st.session_state:
71
- st.session_state[key] = val
72
 
73
  def _setup_providers(self) -> None:
74
- """Configure available providers with health checks."""
75
  self.available_providers: List[str] = []
76
  for provider, config in self.PROVIDER_CONFIG.items():
77
  required_lib = config.get("requires_library")
78
  if required_lib and not globals().get(required_lib.title()):
79
- continue # Skip providers with missing dependencies
 
80
  self.available_providers.append(provider)
81
 
82
  def _setup_input_handlers(self) -> None:
83
- """Register enterprise input processors."""
84
- self.input_processors = {
85
  "text": self._process_text,
86
  "pdf": self._process_pdf,
87
  "csv": self._process_csv,
@@ -93,65 +105,53 @@ class SyntheticDataGenerator:
93
  @lru_cache(maxsize=100)
94
  def generate(self, provider: str, model: str, prompt: str) -> Dict[str, Any]:
95
  """
96
- Unified generation endpoint with failover support.
97
- Caches results to improve performance.
98
  """
99
  try:
100
  if provider not in self.available_providers:
101
- raise ValueError(f"Provider {provider} is not available.")
102
-
103
  client = self._get_client(provider)
104
  if not client:
105
- raise ConnectionError("Client initialization failed.")
106
-
107
  return self._execute_generation(client, provider, model, prompt)
108
-
109
  except Exception as e:
110
- self._log_error(f"Generation Error with provider '{provider}': {str(e)}")
111
  return self._failover_generation(provider, model, prompt)
112
 
113
  def _get_client(self, provider: str) -> Any:
114
  """
115
- Secure client initialization with connection pooling.
116
- Raises ValueError if API key is missing.
117
  """
118
  config = self.PROVIDER_CONFIG[provider]
119
  api_key = st.session_state["api_keys"].get(provider, "")
120
-
121
  if not api_key:
122
- raise ValueError(f"API key required for provider {provider}.")
123
-
124
  try:
125
  if provider == "Groq":
126
  return groq.Groq(api_key=api_key)
127
  elif provider == "HuggingFace":
128
  return {"headers": {"Authorization": f"Bearer {api_key}"}}
129
  else:
130
- # Initialize OpenAI client for OpenAI and Deepseek
131
  return OpenAI(
132
  base_url=config["base_url"],
133
  api_key=api_key,
134
- timeout=30
135
  )
136
  except Exception as e:
137
- self._log_error(f"Client Initialization Failed for {provider}: {str(e)}")
138
- return None
139
 
140
  def _execute_generation(self, client: Any, provider: str, model: str, prompt: str) -> Dict[str, Any]:
141
  """
142
- Execute provider-specific generation with circuit breaker.
143
- Increments API call and token usage metrics.
144
  """
145
  st.session_state["system_metrics"]["api_calls"] += 1
146
 
147
  if provider == "HuggingFace":
148
  url = self.PROVIDER_CONFIG[provider]["base_url"] + model
149
- response = requests.post(
150
- url,
151
- headers=client["headers"],
152
- json={"inputs": prompt},
153
- timeout=30
154
- )
155
  response.raise_for_status()
156
  return response.json()
157
  else:
@@ -159,77 +159,70 @@ class SyntheticDataGenerator:
159
  model=model,
160
  messages=[{"role": "user", "content": prompt}],
161
  temperature=0.1,
162
- max_tokens=2000
163
  )
164
- # Update token usage if available
165
  if hasattr(completion.usage, "total_tokens"):
166
  st.session_state["system_metrics"]["tokens_used"] += completion.usage.total_tokens
167
  try:
168
- result = json.loads(completion.choices[0].message.content)
169
- except json.JSONDecodeError:
170
- result = {"response": completion.choices[0].message.content}
171
- return result
172
 
173
  def _failover_generation(self, original_provider: str, model: str, prompt: str) -> Dict[str, Any]:
174
  """
175
- Enterprise failover to secondary providers.
176
- Tries available providers (excluding the original) until one succeeds.
177
  """
178
  for backup_provider in self.available_providers:
179
  if backup_provider == original_provider:
180
  continue
181
- # Determine model to use: if the desired model is available, use it; otherwise use the first available model.
182
  backup_models = self.PROVIDER_CONFIG[backup_provider]["models"]
183
  backup_model = model if model in backup_models else backup_models[0]
184
  try:
185
  st.session_state["active_provider"] = backup_provider
186
  result = self.generate(backup_provider, backup_model, prompt)
187
- # Optionally, log the failover event
188
- self._log_error(f"Failover succeeded with provider '{backup_provider}' using model '{backup_model}'.")
189
  return result
190
  except Exception as e:
191
- self._log_error(f"Failover attempt with provider '{backup_provider}' failed: {str(e)}")
192
- continue
193
- raise RuntimeError("All generation providers are unavailable.")
194
 
195
  # --- Input Processors ---
196
  def _process_text(self, text: str) -> str:
197
- """Process plain text input by stripping unnecessary whitespace."""
198
  return text.strip()
199
 
200
  def _process_pdf(self, file) -> str:
201
- """Advanced PDF processing with OCR fallback."""
202
  try:
203
  with pdfplumber.open(file) as pdf:
204
- return "\n".join(page.extract_text() or "" for page in pdf.pages)
205
  except Exception as e:
206
- self._log_error(f"PDF Processing Error: {str(e)}")
207
  return ""
208
 
209
  def _process_csv(self, file) -> str:
210
- """Process CSV input by reading it into a DataFrame and converting to CSV string."""
211
  try:
212
  df = pd.read_csv(file)
213
  return df.to_csv(index=False)
214
  except Exception as e:
215
- self._log_error(f"CSV Processing Error: {str(e)}")
216
  return ""
217
 
218
  def _process_api(self, api_url: str) -> str:
219
- """Process API input by fetching JSON data from an endpoint."""
220
  try:
221
  response = requests.get(api_url, timeout=10)
222
  response.raise_for_status()
223
- data = response.json()
224
- return json.dumps(data, indent=2)
225
  except Exception as e:
226
- self._log_error(f"API Processing Error: {str(e)}")
227
  return ""
228
 
229
  def _process_database(self, config: Dict[str, str]) -> str:
230
  """
231
- Process database input by executing a query.
232
- Expects a configuration dictionary with 'connection_string' and 'query' keys.
233
  """
234
  try:
235
  connection_string = config.get("connection_string", "")
@@ -238,45 +231,43 @@ class SyntheticDataGenerator:
238
  raise ValueError("Missing connection string or query.")
239
  engine = sqlalchemy.create_engine(connection_string)
240
  with engine.connect() as connection:
241
- result = pd.read_sql(query, connection)
242
- return result.to_csv(index=False)
243
  except Exception as e:
244
- self._log_error(f"Database Processing Error: {str(e)}")
245
  return ""
246
 
247
  def _process_web(self, url: str) -> str:
248
- """Web content extraction with anti-bot measures."""
249
  try:
250
- response = requests.get(url, headers={
251
- "User-Agent": "Mozilla/5.0 (compatible; SyntheticBot/1.0)"
252
- }, timeout=10)
253
  response.raise_for_status()
254
  return response.text
255
  except Exception as e:
256
- self._log_error(f"Web Extraction Error: {str(e)}")
257
  return ""
258
 
259
- # --- Enterprise Features ---
260
  def _log_error(self, message: str) -> None:
261
- """Centralized error logging with telemetry."""
262
  st.session_state["system_metrics"]["error_count"] += 1
263
  st.session_state["error_logs"].append(message)
 
264
  if st.session_state.get("debug_mode"):
265
  st.error(f"[DEBUG] {message}")
266
 
267
  def health_check(self) -> Dict[str, Any]:
268
- """Comprehensive system diagnostics."""
 
 
269
  return {
270
  "providers_available": self.available_providers,
271
- "api_connectivity": {
272
- provider: self._test_provider_connectivity(provider)
273
- for provider in self.available_providers
274
- },
275
  "system_metrics": st.session_state["system_metrics"],
276
  }
277
 
278
  def _test_provider_connectivity(self, provider: str) -> bool:
279
- """Provider-specific connectivity test."""
280
  try:
281
  client = self._get_client(provider)
282
  if provider == "HuggingFace":
@@ -284,23 +275,23 @@ class SyntheticDataGenerator:
284
  response = requests.get(url, headers=client["headers"], timeout=5)
285
  return response.status_code == 200
286
  else:
287
- # Assuming client has a models.list() method for other providers.
288
  client.models.list()
289
  return True
290
- except Exception:
 
291
  return False
292
 
293
 
294
- # --- Enterprise UI Components ---
295
- def provider_config_ui(gen: SyntheticDataGenerator) -> None:
296
- """Advanced provider configuration interface."""
297
  with st.sidebar:
298
  st.header("⚙️ AI Engine Configuration")
299
  provider = st.selectbox(
300
  "AI Provider",
301
- gen.available_providers,
302
- help="Available providers based on system configuration",
303
- index=gen.available_providers.index(st.session_state.get("active_provider", "OpenAI"))
304
  )
305
  st.session_state["active_provider"] = provider
306
 
@@ -308,30 +299,25 @@ def provider_config_ui(gen: SyntheticDataGenerator) -> None:
308
  f"{provider} API Key",
309
  type="password",
310
  value=st.session_state["api_keys"].get(provider, ""),
311
- help=f"Obtain API key from {provider}'s portal"
312
  )
313
  st.session_state["api_keys"][provider] = api_key
314
 
315
  model = st.selectbox(
316
  "Model",
317
- gen.PROVIDER_CONFIG[provider]["models"],
318
- help="Select model version based on your API plan"
319
  )
320
 
321
  if st.button("Run Health Check"):
322
- report = gen.health_check()
323
  st.json(report)
324
 
325
 
326
  def main() -> None:
327
- """Enterprise-grade user interface."""
328
- st.set_page_config(
329
- page_title="Synthetic Data Factory Pro",
330
- page_icon="🏭",
331
- layout="wide"
332
- )
333
-
334
- gen = SyntheticDataGenerator()
335
 
336
  st.title("🏭 Synthetic Data Factory Pro")
337
  st.markdown(
@@ -341,13 +327,13 @@ def main() -> None:
341
  """
342
  )
343
 
344
- provider_config_ui(gen)
345
 
346
- # Example: Input management and processing
347
  st.subheader("Input Data")
348
- input_type = st.selectbox("Select Input Type", list(gen.input_processors.keys()))
349
  if input_type == "text":
350
- user_input = st.text_area("Enter your text here:")
351
  elif input_type == "pdf":
352
  user_input = st.file_uploader("Upload a PDF file", type=["pdf"])
353
  elif input_type == "csv":
@@ -361,29 +347,32 @@ def main() -> None:
361
 
362
  processed_input = ""
363
  if st.button("Process Input"):
364
- processor = gen.input_processors.get(input_type)
365
  if processor:
366
- if input_type in ["pdf", "csv"]:
367
  processed_input = processor(user_input)
368
  elif input_type == "database":
369
  try:
370
- db_config = json.loads(user_input)
371
- processed_input = processor(db_config)
372
- except json.JSONDecodeError:
373
- st.error("Invalid JSON for database configuration.")
 
374
  else:
375
  processed_input = processor(user_input)
376
  st.text_area("Processed Input", value=processed_input, height=200)
377
 
378
- # Generation section
379
  st.subheader("Generate Synthetic Data")
380
- prompt = st.text_area("Enter your prompt for data generation:")
381
  if st.button("Generate"):
382
  active_provider = st.session_state.get("active_provider", "OpenAI")
383
- # Allow model selection for the generation step
384
- model = st.selectbox("Select Generation Model", gen.PROVIDER_CONFIG[active_provider]["models"])
385
- result = gen.generate(active_provider, model, prompt)
386
- st.json(result)
 
 
387
 
388
 
389
  if __name__ == "__main__":
 
1
  import json
2
+ import logging
3
  import requests
4
  import streamlit as st
5
  import pdfplumber
6
  import pandas as pd
7
  import sqlalchemy
8
+ from typing import Any, Dict, List, Optional, Union, Callable
9
  from functools import lru_cache
10
 
11
+ # --- Logging Configuration ---
12
+ logger = logging.getLogger("SyntheticDataGenerator")
13
+ logger.setLevel(logging.DEBUG)
14
+ if not logger.handlers:
15
+ ch = logging.StreamHandler()
16
+ ch.setLevel(logging.DEBUG)
17
+ formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
18
+ ch.setFormatter(formatter)
19
+ logger.addHandler(ch)
20
+
21
+ # --- Provider Clients with Import Guards ---
22
  try:
23
  from openai import OpenAI
24
  except ImportError:
 
30
  groq = None
31
 
32
 
33
+ # --- Custom Exceptions ---
34
+ class ProviderClientError(Exception):
35
+ """Custom exception for provider client issues."""
36
+ pass
37
+
38
+
39
+ # --- Core Synthetic Data Generator ---
40
  class SyntheticDataGenerator:
41
  """World's Most Advanced Synthetic Data Generation System"""
42
 
 
64
  }
65
 
66
  def __init__(self) -> None:
 
67
  self._init_session_state()
 
68
  self._setup_providers()
69
+ self._setup_input_handlers()
70
 
71
  def _init_session_state(self) -> None:
72
+ """Initialize session state with default values."""
73
  defaults = {
74
  "active_provider": "OpenAI",
75
  "api_keys": {},
76
+ "system_metrics": {"api_calls": 0, "tokens_used": 0, "error_count": 0},
 
 
 
 
 
 
77
  "error_logs": [],
78
  "debug_mode": False,
79
  }
80
+ for key, value in defaults.items():
81
  if key not in st.session_state:
82
+ st.session_state[key] = value
83
 
84
  def _setup_providers(self) -> None:
85
+ """Configure available providers based on dependency availability."""
86
  self.available_providers: List[str] = []
87
  for provider, config in self.PROVIDER_CONFIG.items():
88
  required_lib = config.get("requires_library")
89
  if required_lib and not globals().get(required_lib.title()):
90
+ logger.warning(f"Skipping provider {provider} due to missing dependency: {required_lib}")
91
+ continue
92
  self.available_providers.append(provider)
93
 
94
  def _setup_input_handlers(self) -> None:
95
+ """Register input processors."""
96
+ self.input_processors: Dict[str, Callable[[Any], str]] = {
97
  "text": self._process_text,
98
  "pdf": self._process_pdf,
99
  "csv": self._process_csv,
 
105
  @lru_cache(maxsize=100)
106
  def generate(self, provider: str, model: str, prompt: str) -> Dict[str, Any]:
107
  """
108
+ Unified generation endpoint with caching and failover support.
 
109
  """
110
  try:
111
  if provider not in self.available_providers:
112
+ raise ProviderClientError(f"Provider {provider} is not available.")
 
113
  client = self._get_client(provider)
114
  if not client:
115
+ raise ProviderClientError(f"Client initialization failed for provider {provider}.")
 
116
  return self._execute_generation(client, provider, model, prompt)
 
117
  except Exception as e:
118
+ self._log_error(f"Generation error using provider '{provider}': {e}")
119
  return self._failover_generation(provider, model, prompt)
120
 
121
  def _get_client(self, provider: str) -> Any:
122
  """
123
+ Initialize and return a client for the specified provider.
124
+ Raises ProviderClientError if API key or dependency issues occur.
125
  """
126
  config = self.PROVIDER_CONFIG[provider]
127
  api_key = st.session_state["api_keys"].get(provider, "")
 
128
  if not api_key:
129
+ raise ProviderClientError(f"Missing API key for {provider}.")
 
130
  try:
131
  if provider == "Groq":
132
  return groq.Groq(api_key=api_key)
133
  elif provider == "HuggingFace":
134
  return {"headers": {"Authorization": f"Bearer {api_key}"}}
135
  else:
 
136
  return OpenAI(
137
  base_url=config["base_url"],
138
  api_key=api_key,
139
+ timeout=30,
140
  )
141
  except Exception as e:
142
+ self._log_error(f"Error initializing client for {provider}: {e}")
143
+ raise ProviderClientError(f"Client init error for {provider}")
144
 
145
  def _execute_generation(self, client: Any, provider: str, model: str, prompt: str) -> Dict[str, Any]:
146
  """
147
+ Execute the generation request for the given provider.
148
+ Updates system metrics and returns the result.
149
  """
150
  st.session_state["system_metrics"]["api_calls"] += 1
151
 
152
  if provider == "HuggingFace":
153
  url = self.PROVIDER_CONFIG[provider]["base_url"] + model
154
+ response = requests.post(url, headers=client["headers"], json={"inputs": prompt}, timeout=30)
 
 
 
 
 
155
  response.raise_for_status()
156
  return response.json()
157
  else:
 
159
  model=model,
160
  messages=[{"role": "user", "content": prompt}],
161
  temperature=0.1,
162
+ max_tokens=2000,
163
  )
 
164
  if hasattr(completion.usage, "total_tokens"):
165
  st.session_state["system_metrics"]["tokens_used"] += completion.usage.total_tokens
166
  try:
167
+ return json.loads(completion.choices[0].message.content)
168
+ except Exception:
169
+ return {"response": completion.choices[0].message.content}
 
170
 
171
  def _failover_generation(self, original_provider: str, model: str, prompt: str) -> Dict[str, Any]:
172
  """
173
+ Attempt to generate synthetic data using alternative providers.
 
174
  """
175
  for backup_provider in self.available_providers:
176
  if backup_provider == original_provider:
177
  continue
 
178
  backup_models = self.PROVIDER_CONFIG[backup_provider]["models"]
179
  backup_model = model if model in backup_models else backup_models[0]
180
  try:
181
  st.session_state["active_provider"] = backup_provider
182
  result = self.generate(backup_provider, backup_model, prompt)
183
+ self._log_error(f"Failover succeeded: provider '{backup_provider}' with model '{backup_model}'")
 
184
  return result
185
  except Exception as e:
186
+ self._log_error(f"Failover attempt with {backup_provider} failed: {e}")
187
+ raise ProviderClientError("All generation providers failed.")
 
188
 
189
  # --- Input Processors ---
190
  def _process_text(self, text: str) -> str:
191
+ """Strip and return plain text input."""
192
  return text.strip()
193
 
194
  def _process_pdf(self, file) -> str:
195
+ """Extract and return text from a PDF file."""
196
  try:
197
  with pdfplumber.open(file) as pdf:
198
+ return "\n".join((page.extract_text() or "") for page in pdf.pages)
199
  except Exception as e:
200
+ self._log_error(f"PDF processing error: {e}")
201
  return ""
202
 
203
  def _process_csv(self, file) -> str:
204
+ """Convert CSV file to string via DataFrame conversion."""
205
  try:
206
  df = pd.read_csv(file)
207
  return df.to_csv(index=False)
208
  except Exception as e:
209
+ self._log_error(f"CSV processing error: {e}")
210
  return ""
211
 
212
  def _process_api(self, api_url: str) -> str:
213
+ """Fetch and return JSON data from the provided API URL."""
214
  try:
215
  response = requests.get(api_url, timeout=10)
216
  response.raise_for_status()
217
+ return json.dumps(response.json(), indent=2)
 
218
  except Exception as e:
219
+ self._log_error(f"API processing error: {e}")
220
  return ""
221
 
222
  def _process_database(self, config: Dict[str, str]) -> str:
223
  """
224
+ Execute a database query using a provided configuration.
225
+ Expects a dict with 'connection_string' and 'query' keys.
226
  """
227
  try:
228
  connection_string = config.get("connection_string", "")
 
231
  raise ValueError("Missing connection string or query.")
232
  engine = sqlalchemy.create_engine(connection_string)
233
  with engine.connect() as connection:
234
+ df = pd.read_sql(query, connection)
235
+ return df.to_csv(index=False)
236
  except Exception as e:
237
+ self._log_error(f"Database processing error: {e}")
238
  return ""
239
 
240
  def _process_web(self, url: str) -> str:
241
+ """Fetch and return webpage content using anti-bot headers."""
242
  try:
243
+ response = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (SyntheticBot/1.0)"}, timeout=10)
 
 
244
  response.raise_for_status()
245
  return response.text
246
  except Exception as e:
247
+ self._log_error(f"Web extraction error: {e}")
248
  return ""
249
 
250
+ # --- Logging & Diagnostics ---
251
  def _log_error(self, message: str) -> None:
252
+ """Log errors centrally and update system metrics."""
253
  st.session_state["system_metrics"]["error_count"] += 1
254
  st.session_state["error_logs"].append(message)
255
+ logger.error(message)
256
  if st.session_state.get("debug_mode"):
257
  st.error(f"[DEBUG] {message}")
258
 
259
  def health_check(self) -> Dict[str, Any]:
260
+ """Return diagnostics including provider connectivity and system metrics."""
261
+ connectivity = {provider: self._test_provider_connectivity(provider)
262
+ for provider in self.available_providers}
263
  return {
264
  "providers_available": self.available_providers,
265
+ "api_connectivity": connectivity,
 
 
 
266
  "system_metrics": st.session_state["system_metrics"],
267
  }
268
 
269
  def _test_provider_connectivity(self, provider: str) -> bool:
270
+ """Test connectivity for a given provider."""
271
  try:
272
  client = self._get_client(provider)
273
  if provider == "HuggingFace":
 
275
  response = requests.get(url, headers=client["headers"], timeout=5)
276
  return response.status_code == 200
277
  else:
 
278
  client.models.list()
279
  return True
280
+ except Exception as e:
281
+ self._log_error(f"Connectivity test failed for {provider}: {e}")
282
  return False
283
 
284
 
285
+ # --- Streamlit UI Components ---
286
+ def provider_config_ui(generator: SyntheticDataGenerator) -> None:
287
+ """Provider configuration and health check UI."""
288
  with st.sidebar:
289
  st.header("⚙️ AI Engine Configuration")
290
  provider = st.selectbox(
291
  "AI Provider",
292
+ generator.available_providers,
293
+ index=generator.available_providers.index(st.session_state.get("active_provider", "OpenAI")),
294
+ help="Select your preferred AI provider."
295
  )
296
  st.session_state["active_provider"] = provider
297
 
 
299
  f"{provider} API Key",
300
  type="password",
301
  value=st.session_state["api_keys"].get(provider, ""),
302
+ help=f"Enter your API key for {provider}."
303
  )
304
  st.session_state["api_keys"][provider] = api_key
305
 
306
  model = st.selectbox(
307
  "Model",
308
+ generator.PROVIDER_CONFIG[provider]["models"],
309
+ help="Select the model to use."
310
  )
311
 
312
  if st.button("Run Health Check"):
313
+ report = generator.health_check()
314
  st.json(report)
315
 
316
 
317
  def main() -> None:
318
+ """Main Streamlit UI entry point."""
319
+ st.set_page_config(page_title="Synthetic Data Factory Pro", page_icon="🏭", layout="wide")
320
+ generator = SyntheticDataGenerator()
 
 
 
 
 
321
 
322
  st.title("🏭 Synthetic Data Factory Pro")
323
  st.markdown(
 
327
  """
328
  )
329
 
330
+ provider_config_ui(generator)
331
 
332
+ # --- Input Data Section ---
333
  st.subheader("Input Data")
334
+ input_type = st.selectbox("Select Input Type", list(generator.input_processors.keys()))
335
  if input_type == "text":
336
+ user_input = st.text_area("Enter your text:")
337
  elif input_type == "pdf":
338
  user_input = st.file_uploader("Upload a PDF file", type=["pdf"])
339
  elif input_type == "csv":
 
347
 
348
  processed_input = ""
349
  if st.button("Process Input"):
350
+ processor = generator.input_processors.get(input_type)
351
  if processor:
352
+ if input_type in ("pdf", "csv"):
353
  processed_input = processor(user_input)
354
  elif input_type == "database":
355
  try:
356
+ config = json.loads(user_input)
357
+ processed_input = processor(config)
358
+ except Exception as e:
359
+ st.error("Invalid JSON configuration for database.")
360
+ processed_input = ""
361
  else:
362
  processed_input = processor(user_input)
363
  st.text_area("Processed Input", value=processed_input, height=200)
364
 
365
+ # --- Data Generation Section ---
366
  st.subheader("Generate Synthetic Data")
367
+ prompt = st.text_area("Enter your generation prompt:")
368
  if st.button("Generate"):
369
  active_provider = st.session_state.get("active_provider", "OpenAI")
370
+ model = st.selectbox("Select Generation Model", generator.PROVIDER_CONFIG[active_provider]["models"])
371
+ try:
372
+ result = generator.generate(active_provider, model, prompt)
373
+ st.json(result)
374
+ except Exception as e:
375
+ st.error(f"Data generation failed: {e}")
376
 
377
 
378
  if __name__ == "__main__":