Spaces:

sepp81
/

knjdkjafk

Sleeping

App Files Files Community

sepp81 commited on Dec 15, 2024

Commit

ddf19db

verified ·

1 Parent(s): b1fc271

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -88

app.py CHANGED Viewed

@@ -1,90 +1,75 @@
 import streamlit as st
 import pandas as pd
-import numpy as np
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.model_selection import train_test_split
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.metrics import accuracy_score
-import joblib
-# Title and Description of the App
-st.title("Human vs LLM-Generated Text Differentiator")
-st.write("This app predicts whether a given text is human-written or generated by a language model (LLM).")
-# Step 1: Upload Dataset
-st.header("Step 1: Upload the RoFT Dataset")
-uploaded_file = st.file_uploader("Upload your roft.csv file", type="csv")
-if uploaded_file is not None:
-    # Load dataset
-    data = pd.read_csv(uploaded_file)
-    st.write("Dataset Loaded Successfully!")
-    # Display the first few rows of the dataset
-    st.subheader("Sample of the Dataset:")
-    st.dataframe(data.head())
-    # Preprocessing the data
-    st.header("Step 2: Preprocess the Data")
-    # Combine prompt_body and gen_body to form the complete text
-    data['text'] = data['prompt_body'].fillna('') + ' ' + data['gen_body'].fillna('')
-    data['label'] = data['true_boundary_index'].apply(lambda x: 1 if x == 9 else 0)  # 1 = Human, 0 = LLM
-    st.write("Data Preprocessing Complete!")
-    # Show distribution of labels
-    st.subheader("Label Distribution:")
-    st.bar_chart(data['label'].value_counts())
-    # Feature Extraction
-    st.header("Step 3: Train the Model")
-    st.write("Extracting features using TF-IDF and training a Random Forest classifier.")
-    # TF-IDF Vectorization
-    vectorizer = TfidfVectorizer(max_features=5000)
-    X = vectorizer.fit_transform(data['text']).toarray()
-    y = data['label']
-    # Train-Test Split
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-    # Train a Random Forest Classifier
-    model = RandomForestClassifier(n_estimators=100, random_state=42)
-    model.fit(X_train, y_train)
-    # Evaluate the model
-    y_pred = model.predict(X_test)
-    accuracy = accuracy_score(y_test, y_pred)
-    st.write(f"Model Accuracy: {accuracy * 100:.2f}%")
-    # Save the model and vectorizer
-    joblib.dump(model, 'text_classifier.pkl')
-    joblib.dump(vectorizer, 'vectorizer.pkl')
-    st.success("Model Trained and Saved Successfully!")
-    # Step 4: User Input for Prediction
-    st.header("Step 4: Predict Human vs LLM-Generated Text")
-    # Load the trained model and vectorizer
-    model = joblib.load('text_classifier.pkl')
-    vectorizer = joblib.load('vectorizer.pkl')
-    # Input text from the user
-    user_input = st.text_area("Enter the text you want to classify:")
-    if st.button("Predict"):
-        if user_input.strip():
-            # Vectorize the input text
-            input_vector = vectorizer.transform([user_input]).toarray()
-            # Predict and show the result
-            prediction = model.predict(input_vector)
-            confidence = model.predict_proba(input_vector).max() * 100
-            if prediction[0] == 1:
-                st.success(f"The text is likely **Human-Written** with a confidence of {confidence:.2f}%.")
-            else:
-                st.warning(f"The text is likely **LLM-Generated** with a confidence of {confidence:.2f}%.")
-        else:
-            st.error("Please enter some text for prediction.")

 import streamlit as st
 import pandas as pd
+import random
+# Load the dataset (replace with your actual dataset path)
+df = pd.read_csv('roft.csv')  # Ensure your dataset path is correct
+# Initialize session state variables
+if 'score' not in st.session_state:
+    st.session_state.score = 0
+if 'index' not in st.session_state:
+    st.session_state.index = 0
+if 'game_over' not in st.session_state:
+    st.session_state.game_over = False
+# Function to get next text from the dataset
+def get_next_text():
+    # If game is over, return None to stop
+    if st.session_state.game_over:
+        return None
+    # Fetch the next text in sequence
+    text_data = df.iloc[st.session_state.index]
+    # Combine the prompt body and generation body to form the text to classify
+    prompt_text = text_data['prompt_body']
+    gen_text = text_data['gen_body']
+    # Combine the text and make sure it doesn't exceed 10 sentences
+    full_text = prompt_text + " _SEP_ " + gen_text
+    return full_text, text_data['model'], text_data['true_boundary_index']
+# Function to update the game state
+def update_game_state(user_answer, correct_answer):
+    # Check if answer is correct
+    if user_answer == correct_answer:
+        st.session_state.score += 5
+        st.session_state.index += 1
+        if st.session_state.index >= len(df):
+            st.session_state.game_over = True
+            st.success("Congratulations! You've completed the quiz.")
+    else:
+        st.session_state.game_over = True
+        st.error("Game Over! Incorrect Answer.")
+# Display the score
+st.sidebar.text(f"Score: {st.session_state.score}")
+# Show the current text to classify
+text, model_used, true_boundary_index = get_next_text()
+if text:
+    # Display the text
+    st.write(f"Model used: {model_used}")
+    st.write(f"Text: {text}")
+    # User input (radio buttons for classification)
+    user_answer = st.radio("Classify the text as:", ["human", "machine"])
+    # When user submits answer
+    if st.button("Submit"):
+        # Correct answer is determined based on boundary index
+        correct_answer = "human" if true_boundary_index == 0 else "machine"
+        update_game_state(user_answer, correct_answer)
+else:
+    # Game over message
+    st.write("Game Over!")
+    st.write(f"Your final score is: {st.session_state.score}")
+# Option to restart the game
+if st.session_state.game_over:
+    if st.button("Restart Game"):
+        st.session_state.score = 0
+        st.session_state.index = 0
+        st.session_state.game_over = False
+        st.experimental_rerun()