Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
from evaluate import load as load_metric | |
# Initialize the text generation model | |
generator = pipeline("text-generation", model="gpt2") | |
# Test 1: Checking if the generated text has hallucinations | |
def test_hallucination(): | |
prompt = "The capital of France is" | |
output = generator(prompt, max_length=20, num_return_sequences=1)[0]["generated_text"] | |
assert "Paris" in output, "Hallucination detected: Expected 'Paris' in the output." | |
# Test 2: Evaluate the LLM using BLEU score | |
def test_bleu_score(): | |
bleu = load_metric("bleu") | |
reference = [["The", "capital", "of", "France", "is", "Paris"]] | |
hypothesis = generator("The capital of France is", max_length=20, num_return_sequences=1)[0]["generated_text"].split() | |
bleu_score = bleu.compute(predictions=[hypothesis], references=reference) | |
assert bleu_score["bleu"] > 0.5, f"Low BLEU score: {bleu_score['bleu']}" | |
if __name__ == "__main__": | |
test_hallucination() | |
test_bleu_score() |