shainaraza
commited on
Commit
•
a87da18
1
Parent(s):
23f20f1
Update README.md
Browse files
README.md
CHANGED
@@ -19,28 +19,140 @@ This model merges the base Llama-3.2 architecture with a custom adapter to class
|
|
19 |
|
20 |
- **Base Model**: [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct)
|
21 |
- **Deployment Environment**: Configured for GPU (CUDA) support.
|
|
|
|
|
22 |
|
23 |
## Model Usage
|
24 |
|
25 |
```python
|
26 |
import torch
|
27 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
|
28 |
import pandas as pd
|
|
|
29 |
|
30 |
-
|
31 |
-
tokenizer = AutoTokenizer.from_pretrained("save_directory")
|
32 |
-
model = AutoModelForCausalLM.from_pretrained("save_directory", torch_dtype=torch.float16).to(device)
|
33 |
-
```
|
34 |
|
35 |
-
|
|
|
36 |
|
37 |
-
|
|
|
|
|
|
|
38 |
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
def generate_response(model, prompt):
|
41 |
-
inputs = tokenizer(
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
```
|
45 |
|
46 |
## Dataset and Evaluation
|
|
|
19 |
|
20 |
- **Base Model**: [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct)
|
21 |
- **Deployment Environment**: Configured for GPU (CUDA) support.
|
22 |
+
- **Training Data** : https://huggingface.co/datasets/vector-institute/newsmediabias-plus
|
23 |
+
- **Sampled data for inference**: https://huggingface.co/vector-institute/Llama3.2-Multimodal-Newsmedia-Bias-Detector/blob/main/sampled-data/sample_dataset.csv
|
24 |
|
25 |
## Model Usage
|
26 |
|
27 |
```python
|
28 |
import torch
|
29 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
30 |
+
from peft import PeftModel
|
31 |
+
from tqdm import tqdm
|
32 |
import pandas as pd
|
33 |
+
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
|
34 |
|
35 |
+
LLAMA_MODEL_HF_ID = "vector-institute/Llama3.2-NLP-Newsmedia-Bias-Detector"
|
|
|
|
|
|
|
36 |
|
37 |
+
# Device configuration
|
38 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
39 |
|
40 |
+
# Load tokenizer
|
41 |
+
print("Loading tokenizer...")
|
42 |
+
tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_HF_ID)
|
43 |
+
tokenizer.pad_token = tokenizer.eos_token
|
44 |
|
45 |
+
# Load base model in full precision (to allow merging)
|
46 |
+
print("Loading base model...")
|
47 |
+
model = AutoModelForCausalLM.from_pretrained(
|
48 |
+
LLAMA_MODEL_HF_ID,
|
49 |
+
torch_dtype=torch.float16, # Use float16 or float32 for merging
|
50 |
+
device_map="auto"
|
51 |
+
)
|
52 |
+
|
53 |
+
model.eval()
|
54 |
+
|
55 |
+
# Now proceed with your existing inference and evaluation code
|
56 |
def generate_response(model, prompt):
|
57 |
+
inputs = tokenizer(
|
58 |
+
prompt,
|
59 |
+
return_tensors="pt",
|
60 |
+
padding=True,
|
61 |
+
truncation=True,
|
62 |
+
max_length=1024
|
63 |
+
).to(device)
|
64 |
+
with torch.no_grad():
|
65 |
+
outputs = model.generate(
|
66 |
+
input_ids=inputs['input_ids'],
|
67 |
+
attention_mask=inputs['attention_mask'],
|
68 |
+
max_new_tokens=50,
|
69 |
+
temperature=0.7,
|
70 |
+
do_sample=True,
|
71 |
+
top_p=0.95
|
72 |
+
)
|
73 |
+
generated_text = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
|
74 |
+
return generated_text.strip()
|
75 |
+
|
76 |
+
# Load your test dataset
|
77 |
+
print("Loading test dataset...")
|
78 |
+
df = pd.read_csv('sample_dataset.csv') # https://huggingface.co/vector-institute/Llama3.2-Multimodal-Newsmedia-Bias-Detector/blob/main/sampled-data/sample_dataset.csv
|
79 |
+
|
80 |
+
# Ensure the 'final_label' is in ['Likely', 'Unlikely']
|
81 |
+
df = df[df['final_label'].isin(['Likely', 'Unlikely'])]
|
82 |
+
|
83 |
+
# Balance the dataset
|
84 |
+
likely_samples = df[df['final_label'] == 'Likely']
|
85 |
+
unlikely_samples = df[df['final_label'] == 'Unlikely']
|
86 |
+
|
87 |
+
num_samples_per_category = min(10, len(likely_samples), len(unlikely_samples))
|
88 |
+
|
89 |
+
likely_selected = likely_samples.sample(n=num_samples_per_category, random_state=42)
|
90 |
+
unlikely_selected = unlikely_samples.sample(n=num_samples_per_category, random_state=42)
|
91 |
+
|
92 |
+
balanced_samples = pd.concat([likely_selected, unlikely_selected]).reset_index(drop=True)
|
93 |
+
|
94 |
+
# Prepare test samples directly
|
95 |
+
def format_data(sample):
|
96 |
+
prompt = (
|
97 |
+
"Assess the text below for potential disinformation by identifying the presence of rhetorical techniques listed.\n"
|
98 |
+
"If you find some of the listed rhetorical techniques below, then the article is likely disinformation; if not, it is likely not disinformation.\n\n"
|
99 |
+
"Rhetorical Techniques Checklist:\n"
|
100 |
+
"- Emotional Appeal: Uses language or imagery that intentionally invokes extreme emotions like fear or anger, aiming to distract from lack of factual backing.\n"
|
101 |
+
"- Exaggeration and Hyperbole: Makes claims that are unsupported by evidence, or presents normal situations as extraordinary to manipulate perceptions.\n"
|
102 |
+
"- Bias and Subjectivity: Presents information in a way that unreasonably favors one perspective, omitting key facts that might provide balance.\n"
|
103 |
+
"- Repetition: Uses repeated messaging of specific points or misleading statements to embed a biased viewpoint in the reader's mind.\n"
|
104 |
+
"- Specific Word Choices: Employs emotionally charged or misleading terms to sway opinions subtly, often in a manipulative manner.\n"
|
105 |
+
"- Appeals to Authority: References authorities who lack relevant expertise or cites sources that do not have the credentials to be considered authoritative in the context.\n"
|
106 |
+
"- Lack of Verifiable Sources: Relies on sources that either cannot be verified or do not exist, suggesting a fabrication of information.\n"
|
107 |
+
"- Logical Fallacies: Engages in flawed reasoning such as circular reasoning, strawman arguments, or ad hominem attacks that undermine logical debate.\n"
|
108 |
+
"- Conspiracy Theories: Propagates theories that lack proof and often contain elements of paranoia or implausible scenarios as facts.\n"
|
109 |
+
"- Inconsistencies and Factual Errors: Contains multiple contradictions or factual inaccuracies that are easily disprovable, indicating a lack of concern for truth.\n"
|
110 |
+
"- Selective Omission: Deliberately leaves out crucial information that is essential for a fair understanding of the topic, skewing perception.\n"
|
111 |
+
"- Manipulative Framing: Frames issues in a way that leaves out alternative perspectives or possible explanations, focusing only on aspects that support a biased narrative.\n\n"
|
112 |
+
f"{sample['first_paragraph']}\n\n"
|
113 |
+
"Respond ONLY with the classification 'Likely (1)' or 'Unlikely (0)' without any additional explanation."
|
114 |
+
)
|
115 |
+
response = f"This text should be classified as: {'Likely (1)' if sample['final_label'] == 'Likely' else 'Unlikely (0)'}"
|
116 |
+
return {"prompt": prompt, "response": response, "text": sample['first_paragraph'], "actual_label": sample['final_label']}
|
117 |
+
|
118 |
+
test_samples = [format_data(sample) for _, sample in balanced_samples.iterrows()]
|
119 |
+
|
120 |
+
# Generate predictions and collect results
|
121 |
+
print("Generating predictions...")
|
122 |
+
results = []
|
123 |
+
|
124 |
+
for idx, sample in enumerate(tqdm(test_samples, desc="Processing samples")):
|
125 |
+
prompt = sample["prompt"]
|
126 |
+
true_label = 1 if "Likely (1)" in sample["response"] else 0
|
127 |
+
|
128 |
+
# Generate response using the merged model
|
129 |
+
merged_response = generate_response(model, prompt)
|
130 |
+
merged_predicted_label = 1 if "Likely (1)" in merged_response else 0
|
131 |
+
|
132 |
+
# Save results
|
133 |
+
results.append({
|
134 |
+
"text": sample["text"],
|
135 |
+
"actual_label": true_label,
|
136 |
+
"merged_response": merged_response,
|
137 |
+
"merged_predicted_label": merged_predicted_label
|
138 |
+
})
|
139 |
+
|
140 |
+
# Convert results to DataFrame
|
141 |
+
results_df = pd.DataFrame(results)
|
142 |
+
results_df.to_csv('nlp-results.csv')
|
143 |
+
|
144 |
+
# Display metrics
|
145 |
+
labels = ['Unlikely (0)', 'Likely (1)']
|
146 |
+
|
147 |
+
|
148 |
+
# Optional: Print some example predictions
|
149 |
+
for i in range(5): # Adjust the range as needed
|
150 |
+
sample = results_df.iloc[i]
|
151 |
+
print(f"\nExample {i+1}:")
|
152 |
+
print(f"Text: {sample['text']}")
|
153 |
+
print(f"Actual Label: {'Likely (1)' if sample['actual_label'] == 1 else 'Unlikely (0)'}")
|
154 |
+
print(f"Merged Model Prediction: {sample['merged_response']}")
|
155 |
+
|
156 |
```
|
157 |
|
158 |
## Dataset and Evaluation
|