Spaces:

Sephfox
/

E.L.N

Sleeping

App Files Files Community

Sephfox commited on Nov 10, 2024

Commit

d84bf23

verified ·

1 Parent(s): 56864f5

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -15

app.py CHANGED Viewed

@@ -158,23 +158,55 @@ def main():
     # Sidebar Configuration with Additional Options
     with st.sidebar:
-        st.markdown("### Configuration Panel")
-        training_epochs = st.slider("Training Epochs", min_value=1, max_value=5, value=3)
-        batch_size = st.slider("Batch Size", min_value=2, max_value=8, value=4)
-        model_choice = st.selectbox("Model Selection", ("gpt2", "distilgpt2", "gpt2-medium"))
-        data_source = st.selectbox("Data Source", ("demo", "uploaded file"))
-        custom_learning_rate = st.slider("Learning Rate", min_value=1e-6, max_value=5e-4, value=3e-5, step=1e-6)
-        advanced_toggle = st.checkbox("Advanced Training Settings")
-        if advanced_toggle:
-            warmup_steps = st.slider("Warmup Steps", min_value=0, max_value=500, value=100)
-            weight_decay = st.slider("Weight Decay", min_value=0.0, max_value=0.1, step=0.01, value=0.01)
-        else:
-            warmup_steps = 100
-            weight_decay = 0.01
     # Load Dataset
-    train_dataset = load_dataset(data_source, tokenizer)
     # Start Training with Progress Bar
     progress_placeholder = st.empty()

     # Sidebar Configuration with Additional Options
     with st.sidebar:
+    st.markdown("### Configuration Panel")
+    # Hugging Face API Token Input
+    hf_token = st.text_input("Enter your Hugging Face Token", type="password")
+    if hf_token:
+        api = HfApi()
+        api.set_access_token(hf_token)
+        st.success("Hugging Face token added successfully!")
+    # Training Parameters
+    training_epochs = st.slider("Training Epochs", min_value=1, max_value=5, value=3)
+    batch_size = st.slider("Batch Size", min_value=2, max_value=8, value=4)
+    model_choice = st.selectbox("Model Selection", ("gpt2", "distilgpt2", "gpt2-medium"))
+    # Dataset Source Selection
+    data_source = st.selectbox("Data Source", ("demo", "uploaded file"))
+    if data_source == "uploaded file":
+        uploaded_file = st.file_uploader("Upload a text file", type=["txt", "csv"])
+    custom_learning_rate = st.slider("Learning Rate", min_value=1e-6, max_value=5e-4, value=3e-5, step=1e-6)
+    # Advanced Settings Toggle
+    advanced_toggle = st.checkbox("Advanced Training Settings")
+    if advanced_toggle:
+        warmup_steps = st.slider("Warmup Steps", min_value=0, max_value=500, value=100)
+        weight_decay = st.slider("Weight Decay", min_value=0.0, max_value=0.1, step=0.01, value=0.01)
+    else:
+        warmup_steps = 100
+        weight_decay = 0.01
     # Load Dataset
+train_dataset = load_dataset(data_source, tokenizer, uploaded_file=uploaded_file)
+    def load_dataset(data_source="demo", tokenizer=None, uploaded_file=None):
+    if data_source == "demo":
+        data = ["Sample text data for model training. This can be replaced with actual data for better performance."]
+    elif uploaded_file is not None:
+        if uploaded_file.name.endswith(".txt"):
+            data = [uploaded_file.read().decode("utf-8")]
+        elif uploaded_file.name.endswith(".csv"):
+            import pandas as pd
+            df = pd.read_csv(uploaded_file)
+            data = df[df.columns[0]].tolist()  # assuming first column is text data
+    else:
+        data = ["No file uploaded. Please upload a dataset."]
+    dataset = prepare_dataset(data, tokenizer)
+    return dataset
     # Start Training with Progress Bar
     progress_placeholder = st.empty()