CIS5190FinalProj
/

RandomForest

Model card Files Files and versions Community

Dada80 commited on Dec 16, 2024

Commit

40c3180

·

verified ·

1 Parent(s): d7f9990

Update README.md

Files changed (1) hide show

README.md +45 -0

README.md CHANGED Viewed

@@ -32,3 +32,48 @@ This model classifies news headlines as either NBC or Fox News.
 <!-- These are the evaluation metrics being used, ideally with a description of why. -->
 - Accuracy Score

 <!-- These are the evaluation metrics being used, ideally with a description of why. -->
 - Accuracy Score
+### Model Description
+import pandas as pd
+import joblib
+from huggingface_hub import hf_hub_download
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics import classification_report
+# Mount to drive
+from google.colab import drive
+drive.mount('/content/drive')
+# Load test set
+test_df = pd.read_csv("/content/drive/MyDrive/test_data_random_subset.csv")
+# Log in w/ huggingface token
+# token: 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
+!huggingface-cli login
+# Download the model
+model = hf_hub_download(repo_id = "CIS5190FinalProj/GBTrees", filename = "gb_trees_model.pkl")
+# Download the vectorizer
+tfidf_vectorizer = hf_hub_download(repo_id = "CIS5190FinalProj/GBTrees", filename = "tfidf_vectorizer.pkl")
+# Load the model
+pipeline = joblib.load(model)
+# Load the vectorizer
+tfidf_vectorizer = joblib.load(tfidf_vectorizer)
+# Extract the headlines from the test set
+X_test = test_df['title']
+# Apply transformation to the headlines into numerical features
+X_test_transformed = tfidf_vectorizer.transform(X_test)
+# Make predictions using the pipeline
+y_pred = pipeline.predict(X_test_transformed)
+# Extract 'labels' as target
+y_test = test_df['labels']
+# Print classification report
+print(classification_report(y_test, y_pred))