Dada80 commited on
Commit
40c3180
·
verified ·
1 Parent(s): d7f9990

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +45 -0
README.md CHANGED
@@ -32,3 +32,48 @@ This model classifies news headlines as either NBC or Fox News.
32
  <!-- These are the evaluation metrics being used, ideally with a description of why. -->
33
 
34
  - Accuracy Score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  <!-- These are the evaluation metrics being used, ideally with a description of why. -->
33
 
34
  - Accuracy Score
35
+
36
+ ### Model Description
37
+ import pandas as pd
38
+ import joblib
39
+ from huggingface_hub import hf_hub_download
40
+ from sklearn.feature_extraction.text import TfidfVectorizer
41
+ from sklearn.metrics import classification_report
42
+
43
+ # Mount to drive
44
+ from google.colab import drive
45
+ drive.mount('/content/drive')
46
+
47
+ # Load test set
48
+ test_df = pd.read_csv("/content/drive/MyDrive/test_data_random_subset.csv")
49
+
50
+ # Log in w/ huggingface token
51
+ # token: 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
52
+ !huggingface-cli login
53
+
54
+ # Download the model
55
+ model = hf_hub_download(repo_id = "CIS5190FinalProj/GBTrees", filename = "gb_trees_model.pkl")
56
+
57
+ # Download the vectorizer
58
+ tfidf_vectorizer = hf_hub_download(repo_id = "CIS5190FinalProj/GBTrees", filename = "tfidf_vectorizer.pkl")
59
+
60
+ # Load the model
61
+ pipeline = joblib.load(model)
62
+
63
+ # Load the vectorizer
64
+ tfidf_vectorizer = joblib.load(tfidf_vectorizer)
65
+
66
+ # Extract the headlines from the test set
67
+ X_test = test_df['title']
68
+
69
+ # Apply transformation to the headlines into numerical features
70
+ X_test_transformed = tfidf_vectorizer.transform(X_test)
71
+
72
+ # Make predictions using the pipeline
73
+ y_pred = pipeline.predict(X_test_transformed)
74
+
75
+ # Extract 'labels' as target
76
+ y_test = test_df['labels']
77
+
78
+ # Print classification report
79
+ print(classification_report(y_test, y_pred))