Serhan Yılmaz commited on
Commit
c1ff2ef
·
1 Parent(s): ccde14a
Files changed (2) hide show
  1. app.py +158 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import json
4
+ import gradio as gr
5
+ import pandas as pd
6
+ from datasets import load_dataset
7
+ import random
8
+ from openai import OpenAI
9
+ from typing import List, Tuple, Dict
10
+ from dotenv import load_dotenv
11
+ from transformers import pipeline
12
+ import asyncio
13
+
14
+ # Import the required functions from the pipeline file
15
+ from pipeline_gradio_experimental import generate_basic_question, rank_questions_with_details
16
+
17
+ # Set up logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Load environment variables
22
+ load_dotenv()
23
+
24
+ # Initialize OpenAI client
25
+ client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
26
+
27
+ # Load the SQuAD dataset
28
+ dataset = load_dataset("squad")
29
+
30
+ # Initialize the question answering pipeline
31
+ qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
32
+
33
+ def get_random_entry():
34
+ random_index = random.randint(0, len(dataset['train']) - 1)
35
+ entry = dataset['train'][random_index]
36
+ return entry['context'], entry['answers']['text'][0], entry['question']
37
+
38
+ def generate_answer(context: str, question: str) -> str:
39
+ try:
40
+ result = qa_pipeline(question=question, context=context)
41
+ return result['answer']
42
+ except Exception as e:
43
+ logger.error(f"Error in generate_answer: {e}")
44
+ return "Failed to generate answer"
45
+
46
+ def compare_questions(context: str, original_answer: str, question1: str, answer1: str, question2: str, answer2: str) -> Dict[str, any]:
47
+ try:
48
+ response = client.chat.completions.create(
49
+ model="gpt-4o-2024-08-06",
50
+ messages=[
51
+ {"role": "system", "content": "You are an expert in evaluating question-answer pairs based on a given context."},
52
+ {"role": "user", "content": f"""Compare the following two question-answer pairs based on the given context and original answer. Evaluate their quality and relevance.
53
+
54
+ Context: {context}
55
+ Original Answer: {original_answer}
56
+
57
+ Question 1: {question1}
58
+ Answer 1: {answer1}
59
+
60
+ Question 2: {question2}
61
+ Answer 2: {answer2}
62
+
63
+ Score each question-answer pair on a scale of 0 to 10 based on the quality and relevance of the question and answer. Provide an explanation for your evaluation. Focus on how well the new answer matches the old answer considering the context. Make sure to grade one higher than the other."""}
64
+ ],
65
+ response_format={
66
+ "type": "json_schema",
67
+ "json_schema": {
68
+ "name": "question_comparison_evaluator",
69
+ "strict": True,
70
+ "schema": {
71
+ "type": "object",
72
+ "properties": {
73
+ "question1_score": {"type": "number"},
74
+ "question2_score": {"type": "number"},
75
+ "explanation": {"type": "string"}
76
+ },
77
+ "required": ["question1_score", "question2_score", "explanation"],
78
+ "additionalProperties": False
79
+ }
80
+ }
81
+ }
82
+ )
83
+ return json.loads(response.choices[0].message.content)
84
+ except Exception as e:
85
+ logger.error(f"Error in comparing questions: {e}")
86
+ return {"question1_score": 0, "question2_score": 0, "explanation": "Failed to compare questions"}
87
+
88
+ async def process_random_entry(progress=gr.Progress()):
89
+ context, original_answer, original_question = get_random_entry()
90
+
91
+ # Yield the original context, question, and answer immediately
92
+ yield context, original_question, original_answer, gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
93
+
94
+ # Simulate some processing time
95
+ await asyncio.sleep(1)
96
+ progress(0.3, desc="Generating questions...")
97
+
98
+ basic_question = generate_basic_question(context, original_answer)
99
+ _, _, enhanced_question = rank_questions_with_details(context, original_answer)
100
+
101
+ await asyncio.sleep(1)
102
+ progress(0.6, desc="Generating answers...")
103
+
104
+ basic_answer = generate_answer(context, basic_question)
105
+ enhanced_answer = generate_answer(context, enhanced_question)
106
+
107
+ await asyncio.sleep(1)
108
+ progress(0.9, desc="Comparing questions...")
109
+
110
+ comparison_result = compare_questions(context, original_answer, basic_question, basic_answer, enhanced_question, enhanced_answer)
111
+
112
+ winner = "Basic" if comparison_result["question1_score"] > comparison_result["question2_score"] else "Enhanced"
113
+
114
+ # Yield the final results
115
+ yield (
116
+ context,
117
+ original_question,
118
+ original_answer,
119
+ gr.update(visible=True),
120
+ gr.update(visible=True, value=f"Question: {basic_question}\nAnswer: {basic_answer}"),
121
+ gr.update(visible=True, value=f"Question: {enhanced_question}\nAnswer: {enhanced_answer}"),
122
+ gr.update(visible=True, value=f"Question 1 Score: {comparison_result['question1_score']}\n"
123
+ f"Question 2 Score: {comparison_result['question2_score']}\n"
124
+ f"Explanation: {comparison_result['explanation']}\n"
125
+ f"Winner: {winner} Generation")
126
+ )
127
+
128
+ # Create Gradio interface
129
+ with gr.Blocks(theme=gr.themes.Default()) as iface:
130
+ gr.Markdown("# Question Generation and Comparison")
131
+ gr.Markdown("Click the button to get a random entry from the SQuAD dataset and compare basic and enhanced question generation.")
132
+
133
+ random_button = gr.Button("Get Random Question")
134
+
135
+ with gr.Column(visible=False) as output_column:
136
+ context_output = gr.Textbox(label="Original Context")
137
+ original_question_output = gr.Textbox(label="Original Question")
138
+ original_answer_output = gr.Textbox(label="Original Answer")
139
+ basic_generation_output = gr.Textbox(label="Basic Generation", visible=False)
140
+ enhanced_generation_output = gr.Textbox(label="Enhanced Generation", visible=False)
141
+ comparison_result_output = gr.Textbox(label="Comparison Result", visible=False)
142
+
143
+ random_button.click(
144
+ fn=process_random_entry,
145
+ outputs=[
146
+ context_output,
147
+ original_question_output,
148
+ original_answer_output,
149
+ output_column,
150
+ basic_generation_output,
151
+ enhanced_generation_output,
152
+ comparison_result_output
153
+ ]
154
+ )
155
+
156
+ # Launch the app
157
+ if __name__ == "__main__":
158
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ openai
3
+ numpy
4
+ sentence-transformers
5
+ transformers
6
+ python-dotenv
7
+ pandas