dpaul93 commited on
Commit
2c0321e
1 Parent(s): 90717a4

Create test_llm.py

Browse files
Files changed (1) hide show
  1. test_llm.py +23 -0
test_llm.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from evaluate import load as load_metric
3
+
4
+ # Initialize the text generation model
5
+ generator = pipeline("text-generation", model="gpt2")
6
+
7
+ # Test 1: Checking if the generated text has hallucinations
8
+ def test_hallucination():
9
+ prompt = "The capital of France is"
10
+ output = generator(prompt, max_length=20, num_return_sequences=1)[0]["generated_text"]
11
+ assert "Paris" in output, "Hallucination detected: Expected 'Paris' in the output."
12
+
13
+ # Test 2: Evaluate the LLM using BLEU score
14
+ def test_bleu_score():
15
+ bleu = load_metric("bleu")
16
+ reference = [["The", "capital", "of", "France", "is", "Paris"]]
17
+ hypothesis = generator("The capital of France is", max_length=20, num_return_sequences=1)[0]["generated_text"].split()
18
+ bleu_score = bleu.compute(predictions=[hypothesis], references=reference)
19
+ assert bleu_score["bleu"] > 0.5, f"Low BLEU score: {bleu_score['bleu']}"
20
+
21
+ if __name__ == "__main__":
22
+ test_hallucination()
23
+ test_bleu_score()