jaynopponep commited on
Commit
2e23211
·
1 Parent(s): 154a557

Adding new scikit based code!

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. model.py +11 -19
app.py CHANGED
@@ -1,10 +1,10 @@
1
  from flask import Flask, render_template, request, jsonify
2
- import model # Import your model module
3
 
4
  app = Flask(__name__)
5
 
6
  # Load data and train the model globally
7
- df = model.load_data()
8
  X_train, X_test, y_train, y_test = model.split_data(df)
9
  pipeline = model.create_pipeline(X_train, y_train)
10
 
@@ -18,4 +18,4 @@ def home():
18
  return render_template('home.html')
19
 
20
  if __name__ == '__main__':
21
- app.run(debug=True)
 
1
  from flask import Flask, render_template, request, jsonify
2
+ import model
3
 
4
  app = Flask(__name__)
5
 
6
  # Load data and train the model globally
7
+ df = model.load_data('path_to_AI_Human.csv') # Make sure this path is correct
8
  X_train, X_test, y_train, y_test = model.split_data(df)
9
  pipeline = model.create_pipeline(X_train, y_train)
10
 
 
18
  return render_template('home.html')
19
 
20
  if __name__ == '__main__':
21
+ app.run(debug=True)
model.py CHANGED
@@ -1,46 +1,38 @@
1
  import pandas as pd
2
  from sklearn.model_selection import train_test_split
3
- from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
4
  from sklearn.pipeline import Pipeline
 
5
  from sklearn.naive_bayes import MultinomialNB
 
6
 
7
- # Function to remove unwanted tags from the text
8
  def remove_tags(text):
9
  tags = ['\n', '\'']
10
  for tag in tags:
11
  text = text.replace(tag, '')
12
  return text
13
 
14
- # Assuming the data is loaded into a DataFrame 'df' at some point
15
- def load_data():
16
- # Dummy loading mechanism, replace with actual data loading
17
- df = pd.read_csv("AI_Human.csv")
18
  df['text'] = df['text'].apply(remove_tags)
19
  return df
20
 
21
  def split_data(df):
22
  y = df['generated']
23
  X = df['text']
24
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
25
  return X_train, X_test, y_train, y_test
26
 
27
- # Build and train the pipeline
28
  def create_pipeline(X_train, y_train):
29
  pipeline = Pipeline([
30
- ('count_vectorizer', CountVectorizer()), # Step 1: Convert text to count vectors
31
- ('tfidf_transformer', TfidfTransformer()), # Step 2: Transform count vectors to TF-IDF
32
- ('classifier', MultinomialNB()) # Step 3: Train a classifier, here using Naive Bayes
33
  ])
34
  pipeline.fit(X_train, y_train)
35
  return pipeline
36
 
37
- # Function to predict new inputs using the trained pipeline
38
  def predict_text(text, pipeline):
39
- return pipeline.predict([text])[0] # Return the classification result
 
 
40
 
41
- # Main routine to train the model if this file is executed directly (for testing)
42
- #if __name__ == "__main__":
43
- # df = load_data()
44
- # X_train, X_test, y_train, y_test = split_data(df)
45
- # pipeline = create_pipeline(X_train, y_train)
46
- # print(f"Model trained. Test accuracy: {pipeline.score(X_test, y_test)}")
 
1
  import pandas as pd
2
  from sklearn.model_selection import train_test_split
 
3
  from sklearn.pipeline import Pipeline
4
+ from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
5
  from sklearn.naive_bayes import MultinomialNB
6
+ from sklearn.metrics import accuracy_score, classification_report
7
 
 
8
  def remove_tags(text):
9
  tags = ['\n', '\'']
10
  for tag in tags:
11
  text = text.replace(tag, '')
12
  return text
13
 
14
+ def load_data(filepath):
15
+ df = pd.read_csv(filepath)
 
 
16
  df['text'] = df['text'].apply(remove_tags)
17
  return df
18
 
19
  def split_data(df):
20
  y = df['generated']
21
  X = df['text']
22
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
23
  return X_train, X_test, y_train, y_test
24
 
 
25
  def create_pipeline(X_train, y_train):
26
  pipeline = Pipeline([
27
+ ('count_vectorizer', CountVectorizer()),
28
+ ('tfidf_transformer', TfidfTransformer()),
29
+ ('classifier', MultinomialNB())
30
  ])
31
  pipeline.fit(X_train, y_train)
32
  return pipeline
33
 
 
34
  def predict_text(text, pipeline):
35
+ processed_text = remove_tags(text)
36
+ prediction = pipeline.predict([processed_text])[0]
37
+ return "AI-generated" if prediction else "Human-written"
38