S-Dreamer commited on
Commit
9ad4afc
·
verified ·
1 Parent(s): b5bb095

Upload 2 files

Browse files
pages/02_Model_Training.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import time
4
+ import threading
5
+ from data_utils import list_available_datasets, get_dataset_info
6
+ from model_utils import list_available_huggingface_models
7
+ from training_utils import (
8
+ start_model_training,
9
+ stop_model_training,
10
+ get_running_training_jobs,
11
+ simulate_training
12
+ )
13
+ from utils import (
14
+ set_page_config,
15
+ display_sidebar,
16
+ add_log,
17
+ display_logs,
18
+ plot_training_progress
19
+ )
20
+
21
+ # Set page configuration
22
+ set_page_config()
23
+
24
+ # Display sidebar
25
+ display_sidebar()
26
+
27
+ # Title
28
+ st.title("Model Training")
29
+ st.markdown("Configure and train code generation models on your datasets.")
30
+
31
+ # Training configuration tab
32
+ tab1, tab2 = st.tabs(["Configure Training", "Monitor Jobs"])
33
+
34
+ with tab1:
35
+ st.subheader("Train a New Model")
36
+
37
+ # Model ID input
38
+ model_id = st.text_input("Model ID", placeholder="e.g., my_codegen_model_v1")
39
+
40
+ # Dataset selection
41
+ available_datasets = list_available_datasets()
42
+ if not available_datasets:
43
+ st.warning("No datasets available. Please upload a dataset in the Dataset Management section.")
44
+ dataset_name = None
45
+ else:
46
+ dataset_name = st.selectbox("Select Dataset", available_datasets)
47
+
48
+ # Model selection
49
+ model_options = list_available_huggingface_models()
50
+ base_model = st.selectbox("Select Base Model", model_options)
51
+
52
+ # Training parameters
53
+ st.markdown("### Training Parameters")
54
+ col1, col2 = st.columns(2)
55
+
56
+ with col1:
57
+ learning_rate = st.number_input(
58
+ "Learning Rate",
59
+ min_value=1e-6,
60
+ max_value=1e-3,
61
+ value=2e-5,
62
+ format="%.2e"
63
+ )
64
+ batch_size = st.slider("Batch Size", min_value=1, max_value=32, value=8, step=1)
65
+
66
+ with col2:
67
+ epochs = st.slider("Number of Epochs", min_value=1, max_value=10, value=3, step=1)
68
+ use_simulation = st.checkbox("Use Simulation Mode (for demonstration)", value=True)
69
+
70
+ # Start training button
71
+ if st.button("Start Training", disabled=not dataset_name):
72
+ if not model_id:
73
+ st.error("Please provide a model ID")
74
+ elif model_id in st.session_state.get('trained_models', {}):
75
+ st.error(f"Model with ID '{model_id}' already exists. Please choose a different ID.")
76
+ elif model_id in st.session_state.get('training_progress', {}):
77
+ st.error(f"A training job for model '{model_id}' already exists.")
78
+ else:
79
+ # Initialize stop_events if not present
80
+ if 'stop_events' not in st.session_state:
81
+ st.session_state.stop_events = {}
82
+
83
+ # Start training (real or simulated)
84
+ if use_simulation:
85
+ st.session_state.stop_events[model_id] = simulate_training(
86
+ model_id, dataset_name, base_model, epochs
87
+ )
88
+ add_log(f"Started simulated training for model '{model_id}'")
89
+ else:
90
+ st.session_state.stop_events[model_id] = start_model_training(
91
+ model_id, dataset_name, base_model, learning_rate, batch_size, epochs
92
+ )
93
+ add_log(f"Started training for model '{model_id}'")
94
+
95
+ st.success(f"Training job started for model '{model_id}'")
96
+ time.sleep(1)
97
+ st.rerun()
98
+
99
+ with tab2:
100
+ st.subheader("Training Jobs")
101
+
102
+ # Check if there are any training jobs
103
+ if 'training_progress' not in st.session_state or not st.session_state.training_progress:
104
+ st.info("No training jobs found. Start a new training job in the 'Configure Training' tab.")
105
+ else:
106
+ # List all training jobs
107
+ all_jobs = list(st.session_state.training_progress.keys())
108
+ selected_job = st.selectbox("Select Training Job", all_jobs)
109
+
110
+ if selected_job:
111
+ # Get job progress
112
+ job_progress = st.session_state.training_progress[selected_job]
113
+
114
+ # Display job status
115
+ status = job_progress['status']
116
+ status_color = {
117
+ 'initialized': 'blue',
118
+ 'running': 'green',
119
+ 'completed': 'green',
120
+ 'failed': 'red',
121
+ 'stopped': 'orange'
122
+ }.get(status, 'gray')
123
+
124
+ st.markdown(f"### Status: :{status_color}[{status.upper()}]")
125
+
126
+ # Display progress bar
127
+ progress = job_progress['progress']
128
+ st.progress(progress/100)
129
+
130
+ # Display job details
131
+ col1, col2 = st.columns(2)
132
+
133
+ with col1:
134
+ st.markdown("### Job Details")
135
+ st.markdown(f"**Model ID:** {selected_job}")
136
+ st.markdown(f"**Current Epoch:** {job_progress['current_epoch']}/{job_progress['total_epochs']}")
137
+ st.markdown(f"**Started At:** {job_progress['started_at']}")
138
+
139
+ if job_progress['completed_at']:
140
+ st.markdown(f"**Completed At:** {job_progress['completed_at']}")
141
+
142
+ with col2:
143
+ # Training controls
144
+ st.markdown("### Controls")
145
+
146
+ # Only show stop button for running jobs
147
+ if status == 'running' and selected_job in st.session_state.get('stop_events', {}):
148
+ if st.button("Stop Training"):
149
+ stop_event = st.session_state.stop_events[selected_job]
150
+ stop_model_training(selected_job, stop_event)
151
+ st.success(f"Stopping training for model '{selected_job}'")
152
+ time.sleep(1)
153
+ st.rerun()
154
+
155
+ # Add delete button for completed/failed/stopped jobs
156
+ if status in ['completed', 'failed', 'stopped']:
157
+ if st.button("Delete Job"):
158
+ del st.session_state.training_progress[selected_job]
159
+ if selected_job in st.session_state.get('stop_events', {}):
160
+ del st.session_state.stop_events[selected_job]
161
+ add_log(f"Deleted training job for model '{selected_job}'")
162
+ st.success(f"Training job for model '{selected_job}' deleted")
163
+ time.sleep(1)
164
+ st.rerun()
165
+
166
+ # Display training progress plot
167
+ st.markdown("### Training Progress")
168
+ plot_training_progress(selected_job)
169
+
170
+ # Display logs
171
+ st.markdown("### Training Logs")
172
+ display_logs()
173
+
174
+ # Display running jobs summary at the bottom
175
+ st.markdown("---")
176
+ st.subheader("Running Jobs Summary")
177
+ running_jobs = get_running_training_jobs()
178
+
179
+ if not running_jobs:
180
+ st.info("No active training jobs")
181
+ else:
182
+ for job in running_jobs:
183
+ progress = st.session_state.training_progress[job]
184
+ col1, col2, col3 = st.columns([2, 1, 1])
185
+
186
+ with col1:
187
+ st.markdown(f"**{job}**")
188
+
189
+ with col2:
190
+ st.markdown(f"Epoch {progress['current_epoch']}/{progress['total_epochs']}")
191
+
192
+ with col3:
193
+ st.progress(progress['progress']/100)
pages/03_Code_Generation.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import time
3
+ from model_utils import list_trained_models, generate_code, get_model_info
4
+ from utils import set_page_config, display_sidebar, add_log, format_code
5
+
6
+ # Set page configuration
7
+ set_page_config()
8
+
9
+ # Display sidebar
10
+ display_sidebar()
11
+
12
+ # Title
13
+ st.title("Code Generation")
14
+ st.markdown("Generate Python code using your trained models.")
15
+
16
+ # Get available models
17
+ available_models = list_trained_models()
18
+
19
+ if not available_models:
20
+ st.warning("No trained models available. Please train a model in the Model Training section.")
21
+ else:
22
+ # Create main columns for layout
23
+ col1, col2 = st.columns([1, 1])
24
+
25
+ with col1:
26
+ st.markdown("### Code Generation Setup")
27
+
28
+ # Model selection
29
+ selected_model = st.selectbox("Select Model", available_models)
30
+
31
+ # Display model info if available
32
+ if selected_model:
33
+ model_info = get_model_info(selected_model)
34
+ if model_info:
35
+ st.markdown("#### Model Information")
36
+
37
+ # Create expandable section for model details
38
+ with st.expander("Model Details", expanded=False):
39
+ for key, value in model_info.items():
40
+ if key != 'id': # Skip ID as it's already shown in the selectbox
41
+ st.markdown(f"**{key.replace('_', ' ').title()}:** {value}")
42
+
43
+ # Generation parameters
44
+ st.markdown("#### Generation Parameters")
45
+ max_length = st.slider("Maximum Length", min_value=50, max_value=500, value=200, step=10)
46
+ temperature = st.slider("Temperature", min_value=0.1, max_value=2.0, value=0.7, step=0.1,
47
+ help="Higher values make output more random, lower values more deterministic")
48
+ top_p = st.slider("Top P (Nucleus Sampling)", min_value=0.1, max_value=1.0, value=0.9, step=0.05,
49
+ help="Controls diversity. 0.9 means consider tokens comprising the top 90% probability mass")
50
+
51
+ # Input prompt
52
+ st.markdown("#### Input Prompt")
53
+ prompt = st.text_area(
54
+ "Enter your code prompt",
55
+ height=200,
56
+ placeholder="# Function to calculate fibonacci sequence\ndef fibonacci(n):"
57
+ )
58
+
59
+ # Generate button
60
+ generate_button = st.button("Generate Code", disabled=not prompt)
61
+
62
+ with col2:
63
+ st.markdown("### Generated Code")
64
+
65
+ # Create a placeholder for generated code
66
+ code_placeholder = st.empty()
67
+
68
+ # Initialize session state for code history if not exists
69
+ if 'code_history' not in st.session_state:
70
+ st.session_state.code_history = []
71
+
72
+ # Generate code when button is clicked
73
+ if generate_button and prompt and selected_model:
74
+ with st.spinner("Generating code..."):
75
+ generated_code = generate_code(
76
+ selected_model,
77
+ prompt,
78
+ max_length=max_length,
79
+ temperature=temperature,
80
+ top_p=top_p
81
+ )
82
+
83
+ # Add to history
84
+ st.session_state.code_history.append({
85
+ 'prompt': prompt,
86
+ 'code': generated_code,
87
+ 'model': selected_model,
88
+ 'parameters': {
89
+ 'max_length': max_length,
90
+ 'temperature': temperature,
91
+ 'top_p': top_p
92
+ },
93
+ 'timestamp': time.strftime("%Y-%m-%d %H:%M:%S")
94
+ })
95
+
96
+ # Display the generated code
97
+ code_placeholder.code(format_code(generated_code), language='python')
98
+
99
+ # Log the generation
100
+ add_log(f"Generated code with model '{selected_model}' (length: {len(generated_code)})")
101
+
102
+ # If there's code history but the generate button wasn't pressed, show the most recent one
103
+ elif st.session_state.code_history:
104
+ last_code = st.session_state.code_history[-1]['code']
105
+ code_placeholder.code(format_code(last_code), language='python')
106
+ else:
107
+ # Show empty placeholder when no code has been generated
108
+ code_placeholder.code("# Generated code will appear here", language='python')
109
+
110
+ # Code history section
111
+ st.markdown("---")
112
+ st.markdown("### Code Generation History")
113
+
114
+ if not st.session_state.code_history:
115
+ st.info("No code has been generated yet. Use the form above to generate code.")
116
+ else:
117
+ # Display code history
118
+ for i, item in enumerate(reversed(st.session_state.code_history)):
119
+ with st.expander(f"Generation {len(st.session_state.code_history) - i}: {item['timestamp']}"):
120
+ st.markdown(f"**Model:** {item['model']}")
121
+ st.markdown(f"**Parameters:** Max Length: {item['parameters']['max_length']}, "
122
+ f"Temperature: {item['parameters']['temperature']}, "
123
+ f"Top P: {item['parameters']['top_p']}")
124
+
125
+ st.markdown("**Prompt:**")
126
+ st.code(format_code(item['prompt']), language='python')
127
+
128
+ st.markdown("**Generated Code:**")
129
+ st.code(format_code(item['code']), language='python')
130
+
131
+ # Clear history button
132
+ if st.button("Clear History"):
133
+ st.session_state.code_history = []
134
+ st.success("History cleared!")
135
+ time.sleep(1)
136
+ st.rerun()