File size: 2,263 Bytes
8cb47d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from datasets import load_dataset
import pandas as pd
def get_data(sample_size):
dataset = load_dataset("esnli")
df = dataset['train'].to_pandas()
esnli_train_df = df.dropna(subset=['hypothesis', 'explanation_1'])
prompt_template = """You are an advanced AI trained to understand and explain natural language relationships. I will give you a pair of sentences: a premise and a hypothesis. Your task is to determine the relationship between them and provide a detailed explanation of your reasoning process. The possible relationships are "Entailment," "Contradiction," or "Neutral."
Instructions:
Read the given premise and hypothesis carefully.
Identify the relationship between them based on the following definitions:
Entailment: The hypothesis logically follows from the premise.
Contradiction: The hypothesis directly contradicts the premise.
Neutral: The hypothesis neither logically follows from nor contradicts the premise.
Provide the relationship (Entailment, Contradiction, or Neutral).
Explain in about ten words your reasoning to justify your conclusion.
Example:
Premise: "A man is playing a guitar."
Hypothesis: "A man is making music."
Relationship: Entailment
Explanation: Playing guitar inherently involves creating music, fulfilling the hypothesis.
Now, try it with the following pair:
Premise: "{premise}"
Hypothesis: "{hypothesis}"
Relationship:
"""
# Generate prompts for the dataset
def generate_prompts(df):
prompts = []
for _, row in df.iterrows():
prompt = prompt_template.format(premise=row['premise'], hypothesis=row['hypothesis'])
prompts.append({
'question': prompt,
'answer': {0: 'Entailment', 1: 'Neutral', 2: 'Contradiction'}[row['label']],
'reference_explanation': row['explanation_1']
})
return prompts
sample_df = esnli_train_df.sample(n=sample_size, random_state=42)
prompts_data = generate_prompts(sample_df)
prompts_df = pd.DataFrame(prompts_data)
return prompts_df
if __name__ == '__main__':
sample_size = 5
print(get_data(sample_size)) |