Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,8 +4,6 @@ import streamlit as st
|
|
4 |
import pdfplumber
|
5 |
import pandas as pd
|
6 |
import sqlalchemy
|
7 |
-
import time
|
8 |
-
import concurrent.futures
|
9 |
from typing import Any, Dict, List
|
10 |
|
11 |
# Provider clients (ensure these are installed if you plan to use them)
|
@@ -19,11 +17,12 @@ try:
|
|
19 |
except ImportError:
|
20 |
groq = None
|
21 |
|
22 |
-
# Hugging Face
|
23 |
HF_API_URL = "https://api-inference.huggingface.co/models/"
|
24 |
DEFAULT_TEMPERATURE = 0.1
|
25 |
GROQ_MODEL = "mixtral-8x7b-32768"
|
26 |
|
|
|
27 |
class AdvancedSyntheticDataGenerator:
|
28 |
"""
|
29 |
Advanced Synthetic Data Generator that supports multiple input types,
|
@@ -190,8 +189,7 @@ class AdvancedSyntheticDataGenerator:
|
|
190 |
temperature = st.session_state.config["temperature"]
|
191 |
prompt = self.build_prompt()
|
192 |
|
193 |
-
st.info(f"Using
|
194 |
-
|
195 |
try:
|
196 |
if provider_name == "HuggingFace":
|
197 |
response = self._huggingface_inference(client, prompt, model)
|
@@ -216,13 +214,13 @@ class AdvancedSyntheticDataGenerator:
|
|
216 |
Inference for providers using an OpenAI-compatible API.
|
217 |
"""
|
218 |
try:
|
219 |
-
st.write("Sending prompt
|
220 |
result = client.chat.completions.create(
|
221 |
model=model,
|
222 |
messages=[{"role": "user", "content": prompt}],
|
223 |
temperature=temperature,
|
224 |
)
|
225 |
-
st.write("
|
226 |
return result
|
227 |
except Exception as e:
|
228 |
self.log_error(f"Standard Inference Error: {e}")
|
@@ -230,7 +228,7 @@ class AdvancedSyntheticDataGenerator:
|
|
230 |
|
231 |
def _huggingface_inference(self, client: Dict[str, Any], prompt: str, model: str) -> Any:
|
232 |
"""
|
233 |
-
Inference for Hugging Face Inference API.
|
234 |
"""
|
235 |
try:
|
236 |
st.write("Sending prompt to HuggingFace API...")
|
@@ -241,7 +239,7 @@ class AdvancedSyntheticDataGenerator:
|
|
241 |
timeout=30,
|
242 |
)
|
243 |
response.raise_for_status()
|
244 |
-
st.write("
|
245 |
return response.json()
|
246 |
except Exception as e:
|
247 |
self.log_error(f"HuggingFace Inference Error: {e}")
|
@@ -270,6 +268,7 @@ class AdvancedSyntheticDataGenerator:
|
|
270 |
self.log_error(f"Response Parsing Error: {e}")
|
271 |
return ""
|
272 |
|
|
|
273 |
# ===== Advanced UI Components =====
|
274 |
|
275 |
def advanced_config_ui(generator: AdvancedSyntheticDataGenerator):
|
@@ -297,9 +296,10 @@ def advanced_config_ui(generator: AdvancedSyntheticDataGenerator):
|
|
297 |
height=100)
|
298 |
st.session_state.instructions = instructions
|
299 |
|
|
|
300 |
def advanced_input_ui(generator: AdvancedSyntheticDataGenerator):
|
301 |
"""Display input data source options using tabs."""
|
302 |
-
st.
|
303 |
tabs = st.tabs(["Text", "PDF", "CSV", "API", "Database"])
|
304 |
|
305 |
with tabs[0]:
|
@@ -343,9 +343,10 @@ def advanced_input_ui(generator: AdvancedSyntheticDataGenerator):
|
|
343 |
st.session_state.inputs.append(generator.handle_db({"connection": db_conn, "query": db_query}))
|
344 |
st.success("Database input added!")
|
345 |
|
|
|
346 |
def advanced_output_ui(generator: AdvancedSyntheticDataGenerator):
|
347 |
"""Display the generated synthetic data with output options."""
|
348 |
-
st.
|
349 |
if st.session_state.synthetic_data:
|
350 |
output_format = st.session_state.config.get("output_format", "plain_text")
|
351 |
if output_format == "json":
|
@@ -361,8 +362,9 @@ def advanced_output_ui(generator: AdvancedSyntheticDataGenerator):
|
|
361 |
else:
|
362 |
st.info("No synthetic data generated yet.")
|
363 |
|
|
|
364 |
def advanced_logs_ui():
|
365 |
-
"""Display error logs and
|
366 |
with st.expander("Error Logs & Debug Info", expanded=False):
|
367 |
if st.session_state.error_logs:
|
368 |
for log in st.session_state.error_logs:
|
@@ -370,33 +372,52 @@ def advanced_logs_ui():
|
|
370 |
else:
|
371 |
st.write("No logs yet.")
|
372 |
|
|
|
373 |
# ===== Main Application =====
|
374 |
|
375 |
def main() -> None:
|
376 |
st.set_page_config(page_title="Advanced Synthetic Data Generator", layout="wide")
|
|
|
|
|
377 |
generator = AdvancedSyntheticDataGenerator()
|
378 |
advanced_config_ui(generator)
|
379 |
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
384 |
advanced_input_ui(generator)
|
385 |
-
if st.button("Clear Inputs"):
|
386 |
st.session_state.inputs = []
|
387 |
-
st.success("
|
388 |
-
|
389 |
-
with
|
390 |
-
|
|
|
|
|
391 |
with st.spinner("Generating synthetic data..."):
|
392 |
if generator.generate_synthetic_data():
|
393 |
-
st.success("
|
394 |
else:
|
395 |
st.error("Data generation failed. Check logs for details.")
|
|
|
|
|
|
|
|
|
396 |
advanced_output_ui(generator)
|
397 |
-
|
398 |
-
|
|
|
|
|
399 |
advanced_logs_ui()
|
400 |
|
|
|
401 |
if __name__ == "__main__":
|
402 |
main()
|
|
|
4 |
import pdfplumber
|
5 |
import pandas as pd
|
6 |
import sqlalchemy
|
|
|
|
|
7 |
from typing import Any, Dict, List
|
8 |
|
9 |
# Provider clients (ensure these are installed if you plan to use them)
|
|
|
17 |
except ImportError:
|
18 |
groq = None
|
19 |
|
20 |
+
# Hugging Face Inference API endpoint
|
21 |
HF_API_URL = "https://api-inference.huggingface.co/models/"
|
22 |
DEFAULT_TEMPERATURE = 0.1
|
23 |
GROQ_MODEL = "mixtral-8x7b-32768"
|
24 |
|
25 |
+
|
26 |
class AdvancedSyntheticDataGenerator:
|
27 |
"""
|
28 |
Advanced Synthetic Data Generator that supports multiple input types,
|
|
|
189 |
temperature = st.session_state.config["temperature"]
|
190 |
prompt = self.build_prompt()
|
191 |
|
192 |
+
st.info(f"Using **{provider_name}** with model **{model}** at temperature **{temperature:.2f}**")
|
|
|
193 |
try:
|
194 |
if provider_name == "HuggingFace":
|
195 |
response = self._huggingface_inference(client, prompt, model)
|
|
|
214 |
Inference for providers using an OpenAI-compatible API.
|
215 |
"""
|
216 |
try:
|
217 |
+
st.write("Sending prompt via standard inference...")
|
218 |
result = client.chat.completions.create(
|
219 |
model=model,
|
220 |
messages=[{"role": "user", "content": prompt}],
|
221 |
temperature=temperature,
|
222 |
)
|
223 |
+
st.write("Standard inference result received.")
|
224 |
return result
|
225 |
except Exception as e:
|
226 |
self.log_error(f"Standard Inference Error: {e}")
|
|
|
228 |
|
229 |
def _huggingface_inference(self, client: Dict[str, Any], prompt: str, model: str) -> Any:
|
230 |
"""
|
231 |
+
Inference for the Hugging Face Inference API.
|
232 |
"""
|
233 |
try:
|
234 |
st.write("Sending prompt to HuggingFace API...")
|
|
|
239 |
timeout=30,
|
240 |
)
|
241 |
response.raise_for_status()
|
242 |
+
st.write("HuggingFace API response received.")
|
243 |
return response.json()
|
244 |
except Exception as e:
|
245 |
self.log_error(f"HuggingFace Inference Error: {e}")
|
|
|
268 |
self.log_error(f"Response Parsing Error: {e}")
|
269 |
return ""
|
270 |
|
271 |
+
|
272 |
# ===== Advanced UI Components =====
|
273 |
|
274 |
def advanced_config_ui(generator: AdvancedSyntheticDataGenerator):
|
|
|
296 |
height=100)
|
297 |
st.session_state.instructions = instructions
|
298 |
|
299 |
+
|
300 |
def advanced_input_ui(generator: AdvancedSyntheticDataGenerator):
|
301 |
"""Display input data source options using tabs."""
|
302 |
+
st.subheader("Add Input Data")
|
303 |
tabs = st.tabs(["Text", "PDF", "CSV", "API", "Database"])
|
304 |
|
305 |
with tabs[0]:
|
|
|
343 |
st.session_state.inputs.append(generator.handle_db({"connection": db_conn, "query": db_query}))
|
344 |
st.success("Database input added!")
|
345 |
|
346 |
+
|
347 |
def advanced_output_ui(generator: AdvancedSyntheticDataGenerator):
|
348 |
"""Display the generated synthetic data with output options."""
|
349 |
+
st.subheader("Synthetic Data Output")
|
350 |
if st.session_state.synthetic_data:
|
351 |
output_format = st.session_state.config.get("output_format", "plain_text")
|
352 |
if output_format == "json":
|
|
|
362 |
else:
|
363 |
st.info("No synthetic data generated yet.")
|
364 |
|
365 |
+
|
366 |
def advanced_logs_ui():
|
367 |
+
"""Display error logs and debug information in an expandable section."""
|
368 |
with st.expander("Error Logs & Debug Info", expanded=False):
|
369 |
if st.session_state.error_logs:
|
370 |
for log in st.session_state.error_logs:
|
|
|
372 |
else:
|
373 |
st.write("No logs yet.")
|
374 |
|
375 |
+
|
376 |
# ===== Main Application =====
|
377 |
|
378 |
def main() -> None:
|
379 |
st.set_page_config(page_title="Advanced Synthetic Data Generator", layout="wide")
|
380 |
+
|
381 |
+
# Sidebar for advanced configuration
|
382 |
generator = AdvancedSyntheticDataGenerator()
|
383 |
advanced_config_ui(generator)
|
384 |
|
385 |
+
st.title("Advanced Synthetic Data Generator")
|
386 |
+
st.markdown(
|
387 |
+
"""
|
388 |
+
Welcome! This application allows you to generate synthetic data from multiple input sources.
|
389 |
+
Use the sections below to add inputs, generate data, view outputs, and review logs.
|
390 |
+
"""
|
391 |
+
)
|
392 |
+
|
393 |
+
# Input Data Section
|
394 |
+
with st.container():
|
395 |
+
st.header("1. Input Data Sources")
|
396 |
advanced_input_ui(generator)
|
397 |
+
if st.button("Clear All Inputs"):
|
398 |
st.session_state.inputs = []
|
399 |
+
st.success("All inputs have been cleared!")
|
400 |
+
|
401 |
+
# Generation Section with a clearly visible button
|
402 |
+
with st.container():
|
403 |
+
st.header("2. Generate Synthetic Data")
|
404 |
+
if st.button("Generate Synthetic Data", key="generate_button"):
|
405 |
with st.spinner("Generating synthetic data..."):
|
406 |
if generator.generate_synthetic_data():
|
407 |
+
st.success("Synthetic data generated successfully!")
|
408 |
else:
|
409 |
st.error("Data generation failed. Check logs for details.")
|
410 |
+
|
411 |
+
# Output Section
|
412 |
+
with st.container():
|
413 |
+
st.header("3. Synthetic Data Output")
|
414 |
advanced_output_ui(generator)
|
415 |
+
|
416 |
+
# Logs Section
|
417 |
+
with st.container():
|
418 |
+
st.header("4. Error Logs & Debug Information")
|
419 |
advanced_logs_ui()
|
420 |
|
421 |
+
|
422 |
if __name__ == "__main__":
|
423 |
main()
|