Commit
·
2e23211
1
Parent(s):
154a557
Adding new scikit based code!
Browse files
app.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
from flask import Flask, render_template, request, jsonify
|
2 |
-
import model
|
3 |
|
4 |
app = Flask(__name__)
|
5 |
|
6 |
# Load data and train the model globally
|
7 |
-
df = model.load_data()
|
8 |
X_train, X_test, y_train, y_test = model.split_data(df)
|
9 |
pipeline = model.create_pipeline(X_train, y_train)
|
10 |
|
@@ -18,4 +18,4 @@ def home():
|
|
18 |
return render_template('home.html')
|
19 |
|
20 |
if __name__ == '__main__':
|
21 |
-
app.run(debug=True)
|
|
|
1 |
from flask import Flask, render_template, request, jsonify
|
2 |
+
import model
|
3 |
|
4 |
app = Flask(__name__)
|
5 |
|
6 |
# Load data and train the model globally
|
7 |
+
df = model.load_data('path_to_AI_Human.csv') # Make sure this path is correct
|
8 |
X_train, X_test, y_train, y_test = model.split_data(df)
|
9 |
pipeline = model.create_pipeline(X_train, y_train)
|
10 |
|
|
|
18 |
return render_template('home.html')
|
19 |
|
20 |
if __name__ == '__main__':
|
21 |
+
app.run(debug=True)
|
model.py
CHANGED
@@ -1,46 +1,38 @@
|
|
1 |
import pandas as pd
|
2 |
from sklearn.model_selection import train_test_split
|
3 |
-
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
|
4 |
from sklearn.pipeline import Pipeline
|
|
|
5 |
from sklearn.naive_bayes import MultinomialNB
|
|
|
6 |
|
7 |
-
# Function to remove unwanted tags from the text
|
8 |
def remove_tags(text):
|
9 |
tags = ['\n', '\'']
|
10 |
for tag in tags:
|
11 |
text = text.replace(tag, '')
|
12 |
return text
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
# Dummy loading mechanism, replace with actual data loading
|
17 |
-
df = pd.read_csv("AI_Human.csv")
|
18 |
df['text'] = df['text'].apply(remove_tags)
|
19 |
return df
|
20 |
|
21 |
def split_data(df):
|
22 |
y = df['generated']
|
23 |
X = df['text']
|
24 |
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.
|
25 |
return X_train, X_test, y_train, y_test
|
26 |
|
27 |
-
# Build and train the pipeline
|
28 |
def create_pipeline(X_train, y_train):
|
29 |
pipeline = Pipeline([
|
30 |
-
('count_vectorizer', CountVectorizer()),
|
31 |
-
('tfidf_transformer', TfidfTransformer()),
|
32 |
-
('classifier', MultinomialNB())
|
33 |
])
|
34 |
pipeline.fit(X_train, y_train)
|
35 |
return pipeline
|
36 |
|
37 |
-
# Function to predict new inputs using the trained pipeline
|
38 |
def predict_text(text, pipeline):
|
39 |
-
|
|
|
|
|
40 |
|
41 |
-
# Main routine to train the model if this file is executed directly (for testing)
|
42 |
-
#if __name__ == "__main__":
|
43 |
-
# df = load_data()
|
44 |
-
# X_train, X_test, y_train, y_test = split_data(df)
|
45 |
-
# pipeline = create_pipeline(X_train, y_train)
|
46 |
-
# print(f"Model trained. Test accuracy: {pipeline.score(X_test, y_test)}")
|
|
|
1 |
import pandas as pd
|
2 |
from sklearn.model_selection import train_test_split
|
|
|
3 |
from sklearn.pipeline import Pipeline
|
4 |
+
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
|
5 |
from sklearn.naive_bayes import MultinomialNB
|
6 |
+
from sklearn.metrics import accuracy_score, classification_report
|
7 |
|
|
|
8 |
def remove_tags(text):
|
9 |
tags = ['\n', '\'']
|
10 |
for tag in tags:
|
11 |
text = text.replace(tag, '')
|
12 |
return text
|
13 |
|
14 |
+
def load_data(filepath):
|
15 |
+
df = pd.read_csv(filepath)
|
|
|
|
|
16 |
df['text'] = df['text'].apply(remove_tags)
|
17 |
return df
|
18 |
|
19 |
def split_data(df):
|
20 |
y = df['generated']
|
21 |
X = df['text']
|
22 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
23 |
return X_train, X_test, y_train, y_test
|
24 |
|
|
|
25 |
def create_pipeline(X_train, y_train):
|
26 |
pipeline = Pipeline([
|
27 |
+
('count_vectorizer', CountVectorizer()),
|
28 |
+
('tfidf_transformer', TfidfTransformer()),
|
29 |
+
('classifier', MultinomialNB())
|
30 |
])
|
31 |
pipeline.fit(X_train, y_train)
|
32 |
return pipeline
|
33 |
|
|
|
34 |
def predict_text(text, pipeline):
|
35 |
+
processed_text = remove_tags(text)
|
36 |
+
prediction = pipeline.predict([processed_text])[0]
|
37 |
+
return "AI-generated" if prediction else "Human-written"
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|