isroych
/

prompt-analyzer

Model card Files Files and versions Community

prompt-analyzer / classifier.py

isroych's picture

Upload 4 files

3af5364 over 1 year ago

history blame contribute delete

2.17 kB

	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.model_selection import train_test_split
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.metrics import accuracy_score, classification_report
	from pandas import *
	import pickle
	import sys

	# Step 1: Data Preparation
	# Assuming you have 'text' and 'label' columns in your dataset.
	# Load your data into a pandas DataFrame or use any other data loading method.
	# For this example, we'll use a simple list of reviews and labels.

	filename = 'classifier.bin'


	def train():
	data = read_csv("train.csv")
	text = data['text'].tolist()
	label = data['label'].tolist()
	# Step 2: Text Preprocessing and Feature Extraction
	vectorizer = CountVectorizer()
	X = vectorizer.fit_transform(text)
	# Step 4: Split Data
	X_train, X_test, y_train, y_test = train_test_split(X, label, test_size=0.2, random_state=42)

	# Step 5: Create and Train the Classifier
	classifier = MultinomialNB()
	classifier.fit(X_train, y_train)

	# Step 6: Evaluate the Classifier
	y_pred = classifier.predict(X_test)
	accuracy = accuracy_score(y_test, y_pred)
	report = classification_report(y_test, y_pred)

	print(f"Accuracy: {accuracy}")
	print("Classification Report:\n", report)
	# save the model to disk
	pickle.dump(classifier, open(filename, 'wb'))
	pickle.dump(vectorizer, open('vectorizer.pkl', 'wb'))

	def classify(text):
	classifier = pickle.load(open(filename, 'rb'))
	vectorizer = pickle.load(open('vectorizer.pkl', 'rb'))
	# New text you want to classify
	new_text = [text]

	# Preprocess and convert new text into numerical features using the same vectorizer
	new_text_features = vectorizer.transform(new_text)

	# Use the trained classifier to predict the label
	predicted_label = classifier.predict(new_text_features)
	return predicted_label[0]

	#print(f"Predicted Label: {predicted_label[0]}")

	if __name__ == '__main__':
	train()
	classification = input("would you like to classify(Y/n)?\n")
	if classification.upper() == "Y":
	print(classify("I like turtles, do you?"))
	else:
	pass