Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- pages/02_Model_Training.py +193 -0
- pages/03_Code_Generation.py +136 -0
pages/02_Model_Training.py
ADDED
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import time
|
4 |
+
import threading
|
5 |
+
from data_utils import list_available_datasets, get_dataset_info
|
6 |
+
from model_utils import list_available_huggingface_models
|
7 |
+
from training_utils import (
|
8 |
+
start_model_training,
|
9 |
+
stop_model_training,
|
10 |
+
get_running_training_jobs,
|
11 |
+
simulate_training
|
12 |
+
)
|
13 |
+
from utils import (
|
14 |
+
set_page_config,
|
15 |
+
display_sidebar,
|
16 |
+
add_log,
|
17 |
+
display_logs,
|
18 |
+
plot_training_progress
|
19 |
+
)
|
20 |
+
|
21 |
+
# Set page configuration
|
22 |
+
set_page_config()
|
23 |
+
|
24 |
+
# Display sidebar
|
25 |
+
display_sidebar()
|
26 |
+
|
27 |
+
# Title
|
28 |
+
st.title("Model Training")
|
29 |
+
st.markdown("Configure and train code generation models on your datasets.")
|
30 |
+
|
31 |
+
# Training configuration tab
|
32 |
+
tab1, tab2 = st.tabs(["Configure Training", "Monitor Jobs"])
|
33 |
+
|
34 |
+
with tab1:
|
35 |
+
st.subheader("Train a New Model")
|
36 |
+
|
37 |
+
# Model ID input
|
38 |
+
model_id = st.text_input("Model ID", placeholder="e.g., my_codegen_model_v1")
|
39 |
+
|
40 |
+
# Dataset selection
|
41 |
+
available_datasets = list_available_datasets()
|
42 |
+
if not available_datasets:
|
43 |
+
st.warning("No datasets available. Please upload a dataset in the Dataset Management section.")
|
44 |
+
dataset_name = None
|
45 |
+
else:
|
46 |
+
dataset_name = st.selectbox("Select Dataset", available_datasets)
|
47 |
+
|
48 |
+
# Model selection
|
49 |
+
model_options = list_available_huggingface_models()
|
50 |
+
base_model = st.selectbox("Select Base Model", model_options)
|
51 |
+
|
52 |
+
# Training parameters
|
53 |
+
st.markdown("### Training Parameters")
|
54 |
+
col1, col2 = st.columns(2)
|
55 |
+
|
56 |
+
with col1:
|
57 |
+
learning_rate = st.number_input(
|
58 |
+
"Learning Rate",
|
59 |
+
min_value=1e-6,
|
60 |
+
max_value=1e-3,
|
61 |
+
value=2e-5,
|
62 |
+
format="%.2e"
|
63 |
+
)
|
64 |
+
batch_size = st.slider("Batch Size", min_value=1, max_value=32, value=8, step=1)
|
65 |
+
|
66 |
+
with col2:
|
67 |
+
epochs = st.slider("Number of Epochs", min_value=1, max_value=10, value=3, step=1)
|
68 |
+
use_simulation = st.checkbox("Use Simulation Mode (for demonstration)", value=True)
|
69 |
+
|
70 |
+
# Start training button
|
71 |
+
if st.button("Start Training", disabled=not dataset_name):
|
72 |
+
if not model_id:
|
73 |
+
st.error("Please provide a model ID")
|
74 |
+
elif model_id in st.session_state.get('trained_models', {}):
|
75 |
+
st.error(f"Model with ID '{model_id}' already exists. Please choose a different ID.")
|
76 |
+
elif model_id in st.session_state.get('training_progress', {}):
|
77 |
+
st.error(f"A training job for model '{model_id}' already exists.")
|
78 |
+
else:
|
79 |
+
# Initialize stop_events if not present
|
80 |
+
if 'stop_events' not in st.session_state:
|
81 |
+
st.session_state.stop_events = {}
|
82 |
+
|
83 |
+
# Start training (real or simulated)
|
84 |
+
if use_simulation:
|
85 |
+
st.session_state.stop_events[model_id] = simulate_training(
|
86 |
+
model_id, dataset_name, base_model, epochs
|
87 |
+
)
|
88 |
+
add_log(f"Started simulated training for model '{model_id}'")
|
89 |
+
else:
|
90 |
+
st.session_state.stop_events[model_id] = start_model_training(
|
91 |
+
model_id, dataset_name, base_model, learning_rate, batch_size, epochs
|
92 |
+
)
|
93 |
+
add_log(f"Started training for model '{model_id}'")
|
94 |
+
|
95 |
+
st.success(f"Training job started for model '{model_id}'")
|
96 |
+
time.sleep(1)
|
97 |
+
st.rerun()
|
98 |
+
|
99 |
+
with tab2:
|
100 |
+
st.subheader("Training Jobs")
|
101 |
+
|
102 |
+
# Check if there are any training jobs
|
103 |
+
if 'training_progress' not in st.session_state or not st.session_state.training_progress:
|
104 |
+
st.info("No training jobs found. Start a new training job in the 'Configure Training' tab.")
|
105 |
+
else:
|
106 |
+
# List all training jobs
|
107 |
+
all_jobs = list(st.session_state.training_progress.keys())
|
108 |
+
selected_job = st.selectbox("Select Training Job", all_jobs)
|
109 |
+
|
110 |
+
if selected_job:
|
111 |
+
# Get job progress
|
112 |
+
job_progress = st.session_state.training_progress[selected_job]
|
113 |
+
|
114 |
+
# Display job status
|
115 |
+
status = job_progress['status']
|
116 |
+
status_color = {
|
117 |
+
'initialized': 'blue',
|
118 |
+
'running': 'green',
|
119 |
+
'completed': 'green',
|
120 |
+
'failed': 'red',
|
121 |
+
'stopped': 'orange'
|
122 |
+
}.get(status, 'gray')
|
123 |
+
|
124 |
+
st.markdown(f"### Status: :{status_color}[{status.upper()}]")
|
125 |
+
|
126 |
+
# Display progress bar
|
127 |
+
progress = job_progress['progress']
|
128 |
+
st.progress(progress/100)
|
129 |
+
|
130 |
+
# Display job details
|
131 |
+
col1, col2 = st.columns(2)
|
132 |
+
|
133 |
+
with col1:
|
134 |
+
st.markdown("### Job Details")
|
135 |
+
st.markdown(f"**Model ID:** {selected_job}")
|
136 |
+
st.markdown(f"**Current Epoch:** {job_progress['current_epoch']}/{job_progress['total_epochs']}")
|
137 |
+
st.markdown(f"**Started At:** {job_progress['started_at']}")
|
138 |
+
|
139 |
+
if job_progress['completed_at']:
|
140 |
+
st.markdown(f"**Completed At:** {job_progress['completed_at']}")
|
141 |
+
|
142 |
+
with col2:
|
143 |
+
# Training controls
|
144 |
+
st.markdown("### Controls")
|
145 |
+
|
146 |
+
# Only show stop button for running jobs
|
147 |
+
if status == 'running' and selected_job in st.session_state.get('stop_events', {}):
|
148 |
+
if st.button("Stop Training"):
|
149 |
+
stop_event = st.session_state.stop_events[selected_job]
|
150 |
+
stop_model_training(selected_job, stop_event)
|
151 |
+
st.success(f"Stopping training for model '{selected_job}'")
|
152 |
+
time.sleep(1)
|
153 |
+
st.rerun()
|
154 |
+
|
155 |
+
# Add delete button for completed/failed/stopped jobs
|
156 |
+
if status in ['completed', 'failed', 'stopped']:
|
157 |
+
if st.button("Delete Job"):
|
158 |
+
del st.session_state.training_progress[selected_job]
|
159 |
+
if selected_job in st.session_state.get('stop_events', {}):
|
160 |
+
del st.session_state.stop_events[selected_job]
|
161 |
+
add_log(f"Deleted training job for model '{selected_job}'")
|
162 |
+
st.success(f"Training job for model '{selected_job}' deleted")
|
163 |
+
time.sleep(1)
|
164 |
+
st.rerun()
|
165 |
+
|
166 |
+
# Display training progress plot
|
167 |
+
st.markdown("### Training Progress")
|
168 |
+
plot_training_progress(selected_job)
|
169 |
+
|
170 |
+
# Display logs
|
171 |
+
st.markdown("### Training Logs")
|
172 |
+
display_logs()
|
173 |
+
|
174 |
+
# Display running jobs summary at the bottom
|
175 |
+
st.markdown("---")
|
176 |
+
st.subheader("Running Jobs Summary")
|
177 |
+
running_jobs = get_running_training_jobs()
|
178 |
+
|
179 |
+
if not running_jobs:
|
180 |
+
st.info("No active training jobs")
|
181 |
+
else:
|
182 |
+
for job in running_jobs:
|
183 |
+
progress = st.session_state.training_progress[job]
|
184 |
+
col1, col2, col3 = st.columns([2, 1, 1])
|
185 |
+
|
186 |
+
with col1:
|
187 |
+
st.markdown(f"**{job}**")
|
188 |
+
|
189 |
+
with col2:
|
190 |
+
st.markdown(f"Epoch {progress['current_epoch']}/{progress['total_epochs']}")
|
191 |
+
|
192 |
+
with col3:
|
193 |
+
st.progress(progress['progress']/100)
|
pages/03_Code_Generation.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import time
|
3 |
+
from model_utils import list_trained_models, generate_code, get_model_info
|
4 |
+
from utils import set_page_config, display_sidebar, add_log, format_code
|
5 |
+
|
6 |
+
# Set page configuration
|
7 |
+
set_page_config()
|
8 |
+
|
9 |
+
# Display sidebar
|
10 |
+
display_sidebar()
|
11 |
+
|
12 |
+
# Title
|
13 |
+
st.title("Code Generation")
|
14 |
+
st.markdown("Generate Python code using your trained models.")
|
15 |
+
|
16 |
+
# Get available models
|
17 |
+
available_models = list_trained_models()
|
18 |
+
|
19 |
+
if not available_models:
|
20 |
+
st.warning("No trained models available. Please train a model in the Model Training section.")
|
21 |
+
else:
|
22 |
+
# Create main columns for layout
|
23 |
+
col1, col2 = st.columns([1, 1])
|
24 |
+
|
25 |
+
with col1:
|
26 |
+
st.markdown("### Code Generation Setup")
|
27 |
+
|
28 |
+
# Model selection
|
29 |
+
selected_model = st.selectbox("Select Model", available_models)
|
30 |
+
|
31 |
+
# Display model info if available
|
32 |
+
if selected_model:
|
33 |
+
model_info = get_model_info(selected_model)
|
34 |
+
if model_info:
|
35 |
+
st.markdown("#### Model Information")
|
36 |
+
|
37 |
+
# Create expandable section for model details
|
38 |
+
with st.expander("Model Details", expanded=False):
|
39 |
+
for key, value in model_info.items():
|
40 |
+
if key != 'id': # Skip ID as it's already shown in the selectbox
|
41 |
+
st.markdown(f"**{key.replace('_', ' ').title()}:** {value}")
|
42 |
+
|
43 |
+
# Generation parameters
|
44 |
+
st.markdown("#### Generation Parameters")
|
45 |
+
max_length = st.slider("Maximum Length", min_value=50, max_value=500, value=200, step=10)
|
46 |
+
temperature = st.slider("Temperature", min_value=0.1, max_value=2.0, value=0.7, step=0.1,
|
47 |
+
help="Higher values make output more random, lower values more deterministic")
|
48 |
+
top_p = st.slider("Top P (Nucleus Sampling)", min_value=0.1, max_value=1.0, value=0.9, step=0.05,
|
49 |
+
help="Controls diversity. 0.9 means consider tokens comprising the top 90% probability mass")
|
50 |
+
|
51 |
+
# Input prompt
|
52 |
+
st.markdown("#### Input Prompt")
|
53 |
+
prompt = st.text_area(
|
54 |
+
"Enter your code prompt",
|
55 |
+
height=200,
|
56 |
+
placeholder="# Function to calculate fibonacci sequence\ndef fibonacci(n):"
|
57 |
+
)
|
58 |
+
|
59 |
+
# Generate button
|
60 |
+
generate_button = st.button("Generate Code", disabled=not prompt)
|
61 |
+
|
62 |
+
with col2:
|
63 |
+
st.markdown("### Generated Code")
|
64 |
+
|
65 |
+
# Create a placeholder for generated code
|
66 |
+
code_placeholder = st.empty()
|
67 |
+
|
68 |
+
# Initialize session state for code history if not exists
|
69 |
+
if 'code_history' not in st.session_state:
|
70 |
+
st.session_state.code_history = []
|
71 |
+
|
72 |
+
# Generate code when button is clicked
|
73 |
+
if generate_button and prompt and selected_model:
|
74 |
+
with st.spinner("Generating code..."):
|
75 |
+
generated_code = generate_code(
|
76 |
+
selected_model,
|
77 |
+
prompt,
|
78 |
+
max_length=max_length,
|
79 |
+
temperature=temperature,
|
80 |
+
top_p=top_p
|
81 |
+
)
|
82 |
+
|
83 |
+
# Add to history
|
84 |
+
st.session_state.code_history.append({
|
85 |
+
'prompt': prompt,
|
86 |
+
'code': generated_code,
|
87 |
+
'model': selected_model,
|
88 |
+
'parameters': {
|
89 |
+
'max_length': max_length,
|
90 |
+
'temperature': temperature,
|
91 |
+
'top_p': top_p
|
92 |
+
},
|
93 |
+
'timestamp': time.strftime("%Y-%m-%d %H:%M:%S")
|
94 |
+
})
|
95 |
+
|
96 |
+
# Display the generated code
|
97 |
+
code_placeholder.code(format_code(generated_code), language='python')
|
98 |
+
|
99 |
+
# Log the generation
|
100 |
+
add_log(f"Generated code with model '{selected_model}' (length: {len(generated_code)})")
|
101 |
+
|
102 |
+
# If there's code history but the generate button wasn't pressed, show the most recent one
|
103 |
+
elif st.session_state.code_history:
|
104 |
+
last_code = st.session_state.code_history[-1]['code']
|
105 |
+
code_placeholder.code(format_code(last_code), language='python')
|
106 |
+
else:
|
107 |
+
# Show empty placeholder when no code has been generated
|
108 |
+
code_placeholder.code("# Generated code will appear here", language='python')
|
109 |
+
|
110 |
+
# Code history section
|
111 |
+
st.markdown("---")
|
112 |
+
st.markdown("### Code Generation History")
|
113 |
+
|
114 |
+
if not st.session_state.code_history:
|
115 |
+
st.info("No code has been generated yet. Use the form above to generate code.")
|
116 |
+
else:
|
117 |
+
# Display code history
|
118 |
+
for i, item in enumerate(reversed(st.session_state.code_history)):
|
119 |
+
with st.expander(f"Generation {len(st.session_state.code_history) - i}: {item['timestamp']}"):
|
120 |
+
st.markdown(f"**Model:** {item['model']}")
|
121 |
+
st.markdown(f"**Parameters:** Max Length: {item['parameters']['max_length']}, "
|
122 |
+
f"Temperature: {item['parameters']['temperature']}, "
|
123 |
+
f"Top P: {item['parameters']['top_p']}")
|
124 |
+
|
125 |
+
st.markdown("**Prompt:**")
|
126 |
+
st.code(format_code(item['prompt']), language='python')
|
127 |
+
|
128 |
+
st.markdown("**Generated Code:**")
|
129 |
+
st.code(format_code(item['code']), language='python')
|
130 |
+
|
131 |
+
# Clear history button
|
132 |
+
if st.button("Clear History"):
|
133 |
+
st.session_state.code_history = []
|
134 |
+
st.success("History cleared!")
|
135 |
+
time.sleep(1)
|
136 |
+
st.rerun()
|