Spaces:

dpaul93
/

llmopscicd

Sleeping

dpaul93 commited on Oct 10, 2024

Commit

2c0321e

verified ·

1 Parent(s): 90717a4

Create test_llm.py

Files changed (1) hide show

test_llm.py ADDED Viewed

+from transformers import pipeline
+from evaluate import load as load_metric
+# Initialize the text generation model
+generator = pipeline("text-generation", model="gpt2")
+# Test 1: Checking if the generated text has hallucinations
+def test_hallucination():
+    prompt = "The capital of France is"
+    output = generator(prompt, max_length=20, num_return_sequences=1)[0]["generated_text"]
+    assert "Paris" in output, "Hallucination detected: Expected 'Paris' in the output."
+# Test 2: Evaluate the LLM using BLEU score
+def test_bleu_score():
+    bleu = load_metric("bleu")
+    reference = [["The", "capital", "of", "France", "is", "Paris"]]
+    hypothesis = generator("The capital of France is", max_length=20, num_return_sequences=1)[0]["generated_text"].split()
+    bleu_score = bleu.compute(predictions=[hypothesis], references=reference)
+    assert bleu_score["bleu"] > 0.5, f"Low BLEU score: {bleu_score['bleu']}"
+if __name__ == "__main__":
+    test_hallucination()
+    test_bleu_score()