Spaces:
Runtime error
Runtime error
Marcus Posey
commited on
Commit
·
e68ea89
1
Parent(s):
591cea8
Add application file
Browse files- app.py +277 -0
- prompt_template.txt +34 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
import httpx
|
5 |
+
from gradio_client import Client
|
6 |
+
from urllib.parse import urljoin
|
7 |
+
import gspread
|
8 |
+
from oauth2client.service_account import ServiceAccountCredentials
|
9 |
+
import gradio as gr
|
10 |
+
from huggingface_hub import login
|
11 |
+
|
12 |
+
|
13 |
+
BOOK_MAPPING = {
|
14 |
+
"Spaceman on a Spree": "In the context of \"Spaceman on a Spree\", written by Mack Reynolds in 1961,",
|
15 |
+
"Charity Case": "In the context of \"Charity Case\", written by Jim Harmon in 1972,",
|
16 |
+
"A Gift from Earth": "In the context of \"A Gift From Earth\", written by Manly Banister in 1950,",
|
17 |
+
"Pick a Crime": "In the context of \"Pick a Crime\", written by Richard Rein Smith in 1970,",
|
18 |
+
"Dangerous Quarry": "In the context of \"Dangerous Quarry\", written by Jim Harmon in 1972,"
|
19 |
+
}
|
20 |
+
|
21 |
+
CATEGORY_MAPPING = {
|
22 |
+
"Character": "character",
|
23 |
+
"Relationship": "relationship",
|
24 |
+
"Plot": "plot",
|
25 |
+
"Numerical": "numerical"
|
26 |
+
}
|
27 |
+
|
28 |
+
MODEL_VARIANTS = ["rephrase", "rephrase_summarize", "entigraph"]
|
29 |
+
|
30 |
+
model_responses = {"Model_A": "", "Model_B": "", "Model_C": ""}
|
31 |
+
|
32 |
+
|
33 |
+
class ModelManager:
|
34 |
+
def __init__(self):
|
35 |
+
self.book_model_assignments = {}
|
36 |
+
self.model_A = Client("mep296/llama-3-8b-rephrase-quality")
|
37 |
+
self.model_B = Client("mep296/llama-3-8b-rephrase-summarize-quality")
|
38 |
+
self.model_C = Client("mep296/llama-3-8b-entigraph-quality")
|
39 |
+
self.template_text = self._load_template()
|
40 |
+
|
41 |
+
def _load_template(self):
|
42 |
+
with open("prompt_template.txt", "r", encoding="utf-8") as file:
|
43 |
+
return file.read()
|
44 |
+
|
45 |
+
def get_model_response(self, model_name, prompt):
|
46 |
+
try:
|
47 |
+
formatted_prompt = self.template_text.format(prompt)
|
48 |
+
|
49 |
+
model_clients = {
|
50 |
+
"Model_A": self.model_A,
|
51 |
+
"Model_B": self.model_B,
|
52 |
+
"Model_C": self.model_C
|
53 |
+
}
|
54 |
+
|
55 |
+
client = model_clients[model_name]
|
56 |
+
response = client.predict(
|
57 |
+
prompt=formatted_prompt,
|
58 |
+
api_name="/predict"
|
59 |
+
)
|
60 |
+
return response
|
61 |
+
except (httpx.ReadTimeout, httpx.ConnectTimeout) as e:
|
62 |
+
print(f"Timeout while getting response from {model_name}: {str(e)}")
|
63 |
+
return f"Error: Model {model_name} timed out. Please try again."
|
64 |
+
except Exception as e:
|
65 |
+
print(f"Error getting response from {model_name}: {str(e)}")
|
66 |
+
return f"Error: Could not get response from {model_name}. Please try again."
|
67 |
+
|
68 |
+
def get_all_model_responses(self, prompt):
|
69 |
+
responses = []
|
70 |
+
for model in ['A', 'B', 'C']:
|
71 |
+
response = self.get_model_response(f"Model_{model}", prompt)
|
72 |
+
model_responses[f"Model_{model}"] = response
|
73 |
+
responses.append(response)
|
74 |
+
return responses
|
75 |
+
|
76 |
+
def get_book_model_mapping(self, book):
|
77 |
+
if book not in self.book_model_assignments:
|
78 |
+
shuffled_models = random.sample(MODEL_VARIANTS, len(MODEL_VARIANTS))
|
79 |
+
self.book_model_assignments[book] = {
|
80 |
+
"Model A": shuffled_models[0],
|
81 |
+
"Model B": shuffled_models[1],
|
82 |
+
"Model C": shuffled_models[2]
|
83 |
+
}
|
84 |
+
return self.book_model_assignments[book]
|
85 |
+
|
86 |
+
|
87 |
+
class SheetManager:
|
88 |
+
def __init__(self):
|
89 |
+
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
|
90 |
+
creds = ServiceAccountCredentials.from_json_keyfile_dict(variables_keys, scope)
|
91 |
+
client = gspread.authorize(creds)
|
92 |
+
self.sheet = client.open("Model Blind Comparison Ratings").sheet1
|
93 |
+
|
94 |
+
def append_rating(self, rating_data):
|
95 |
+
self.sheet.append_row([
|
96 |
+
rating_data["book"],
|
97 |
+
rating_data["category"],
|
98 |
+
rating_data["prompt"],
|
99 |
+
rating_data["rephrase_rating"],
|
100 |
+
rating_data["rephrase_summarize_rating"],
|
101 |
+
rating_data["entigraph_rating"],
|
102 |
+
rating_data["rephrase_response"],
|
103 |
+
rating_data["rephrase_summarize_response"],
|
104 |
+
rating_data["entigraph_response"],
|
105 |
+
])
|
106 |
+
|
107 |
+
|
108 |
+
class ModelComparisonApp:
|
109 |
+
def __init__(self):
|
110 |
+
self.model_manager = ModelManager()
|
111 |
+
self.sheet_manager = SheetManager()
|
112 |
+
self.votes = []
|
113 |
+
self.selected_book_string = BOOK_MAPPING["Spaceman on a Spree"]
|
114 |
+
self.selected_category_string = ""
|
115 |
+
self.chat_history_A = []
|
116 |
+
self.chat_history_B = []
|
117 |
+
self.chat_history_C = []
|
118 |
+
self.state = gr.State(value="")
|
119 |
+
|
120 |
+
def create_interface(self):
|
121 |
+
text_size = gr.themes.sizes.text_lg
|
122 |
+
with gr.Blocks(theme=gr.themes.Default(text_size=text_size), fill_width=True) as demo:
|
123 |
+
gr.Markdown("# Model Blind Comparison")
|
124 |
+
|
125 |
+
with gr.Group():
|
126 |
+
with gr.Row():
|
127 |
+
chat_interfaces = self._create_chat_interfaces()
|
128 |
+
|
129 |
+
with gr.Row():
|
130 |
+
ratings = self._create_rating_sliders()
|
131 |
+
|
132 |
+
with gr.Row(equal_height=True):
|
133 |
+
submit_button = gr.Button(value="⭐ Submit Ratings", interactive=False)
|
134 |
+
submission_status = gr.Textbox(label="Submission Status", interactive=False)
|
135 |
+
|
136 |
+
with gr.Row(equal_height=True):
|
137 |
+
input_elements = self._create_input_elements()
|
138 |
+
|
139 |
+
self._setup_event_handlers(demo, chat_interfaces, ratings, submit_button, submission_status, input_elements)
|
140 |
+
|
141 |
+
return demo
|
142 |
+
|
143 |
+
def _create_chat_interfaces(self):
|
144 |
+
interfaces = {}
|
145 |
+
for model in ['A', 'B', 'C']:
|
146 |
+
interfaces[model] = gr.Chatbot(
|
147 |
+
getattr(self, f'chat_history_{model}'),
|
148 |
+
type="messages",
|
149 |
+
label=f"Model {model}",
|
150 |
+
height=650,
|
151 |
+
show_copy_button=True
|
152 |
+
)
|
153 |
+
return interfaces
|
154 |
+
|
155 |
+
def _create_rating_sliders(self):
|
156 |
+
return {
|
157 |
+
str(i): gr.Slider(1, 5, step=1, label=f"Rate Response {chr(64+i)}",
|
158 |
+
interactive=True, value=3)
|
159 |
+
for i in range(1, 4)
|
160 |
+
}
|
161 |
+
|
162 |
+
def _create_input_elements(self):
|
163 |
+
return {
|
164 |
+
'book': gr.Dropdown(choices=list(BOOK_MAPPING.keys()),
|
165 |
+
label="Select a Book", interactive=True, scale=1),
|
166 |
+
'category': gr.Dropdown(choices=list(CATEGORY_MAPPING.keys()),
|
167 |
+
label="Select a Question Category", interactive=True, scale=1),
|
168 |
+
'question': gr.Textbox(label="Type a Question", max_lines=1,
|
169 |
+
placeholder="e.g. What is the relationship between characters?",
|
170 |
+
interactive=True, scale=2),
|
171 |
+
'send': gr.Button("Send", scale=0, variant="primary", interactive=False)
|
172 |
+
}
|
173 |
+
|
174 |
+
def respond(self, message, chat_A, chat_B, chat_C):
|
175 |
+
if not message.strip():
|
176 |
+
raise gr.Error("Message cannot be empty!")
|
177 |
+
|
178 |
+
prompt = f"{self.selected_book_string} {message}"
|
179 |
+
|
180 |
+
responses = self.model_manager.get_all_model_responses(prompt)
|
181 |
+
|
182 |
+
chats = []
|
183 |
+
for response in responses:
|
184 |
+
chat = []
|
185 |
+
chat.append({"role": "user", "content": prompt})
|
186 |
+
chat.append({"role": "assistant", "content": response})
|
187 |
+
chats.append(chat)
|
188 |
+
|
189 |
+
return chats
|
190 |
+
|
191 |
+
def get_votes(self, book, category, question, rating_A, rating_B, rating_C):
|
192 |
+
model_mapping = self.model_manager.get_book_model_mapping(book)
|
193 |
+
rating_data = {
|
194 |
+
"book": book,
|
195 |
+
"category": category,
|
196 |
+
"prompt": question,
|
197 |
+
"rephrase_rating": rating_A if model_mapping["Model A"] == "rephrase" else
|
198 |
+
rating_B if model_mapping["Model B"] == "rephrase" else rating_C,
|
199 |
+
"rephrase_summarize_rating": rating_A if model_mapping["Model A"] == "rephrase_summarize" else
|
200 |
+
rating_B if model_mapping["Model B"] == "rephrase_summarize" else rating_C,
|
201 |
+
"entigraph_rating": rating_A if model_mapping["Model A"] == "entigraph" else
|
202 |
+
rating_B if model_mapping["Model B"] == "entigraph" else rating_C,
|
203 |
+
"rephrase_response": model_responses["Model_A"] if model_mapping["Model A"] == "rephrase" else
|
204 |
+
model_responses["Model_B"] if model_mapping["Model B"] == "rephrase" else model_responses["Model_C"],
|
205 |
+
"rephrase_summarize_response": model_responses["Model_A"] if model_mapping["Model A"] == "rephrase_summarize" else
|
206 |
+
model_responses["Model_B"] if model_mapping["Model B"] == "rephrase_summarize" else model_responses["Model_C"],
|
207 |
+
"entigraph_response": model_responses["Model_A"] if model_mapping["Model A"] == "entigraph" else
|
208 |
+
model_responses["Model_B"] if model_mapping["Model B"] == "entigraph" else model_responses["Model_C"]
|
209 |
+
}
|
210 |
+
|
211 |
+
self.votes.append(rating_data)
|
212 |
+
self.sheet_manager.append_rating(rating_data)
|
213 |
+
return ("Ratings submitted successfully!", gr.update(interactive=False))
|
214 |
+
|
215 |
+
def _setup_event_handlers(self, demo, chat_interfaces, ratings, submit_button, submission_status, input_elements):
|
216 |
+
def enable_send_btn(book, category, question):
|
217 |
+
return gr.update(interactive=bool(book and category and question))
|
218 |
+
|
219 |
+
def enable_button_group(model_A, model_B, model_C):
|
220 |
+
return gr.update(interactive=bool(model_A and model_B and model_C))
|
221 |
+
|
222 |
+
def update_selected_book(book_selection):
|
223 |
+
self.selected_book_string = BOOK_MAPPING.get(book_selection, "")
|
224 |
+
return self.selected_book_string
|
225 |
+
|
226 |
+
for input_name in ['book', 'category', 'question']:
|
227 |
+
input_elements[input_name].change(
|
228 |
+
enable_send_btn,
|
229 |
+
inputs=[input_elements['book'], input_elements['category'], input_elements['question']],
|
230 |
+
outputs=[input_elements['send']]
|
231 |
+
)
|
232 |
+
|
233 |
+
input_elements['book'].change(
|
234 |
+
update_selected_book,
|
235 |
+
inputs=[input_elements['book']],
|
236 |
+
outputs=[self.state]
|
237 |
+
)
|
238 |
+
|
239 |
+
submit_button.click(
|
240 |
+
self.get_votes,
|
241 |
+
inputs=[input_elements['book'], input_elements['category'], input_elements['question'],
|
242 |
+
ratings['1'], ratings['2'], ratings['3']],
|
243 |
+
outputs=[submission_status, submit_button]
|
244 |
+
)
|
245 |
+
|
246 |
+
input_elements['send'].click(
|
247 |
+
self.respond,
|
248 |
+
inputs=[input_elements['question']] + list(chat_interfaces.values()),
|
249 |
+
outputs=list(chat_interfaces.values())
|
250 |
+
)
|
251 |
+
|
252 |
+
for interface in chat_interfaces.values():
|
253 |
+
interface.change(
|
254 |
+
enable_button_group,
|
255 |
+
inputs=list(chat_interfaces.values()),
|
256 |
+
outputs=[submit_button]
|
257 |
+
)
|
258 |
+
|
259 |
+
if __name__ == "__main__":
|
260 |
+
PRIVATE_KEY = os.getenv('PRIVATE_KEY')
|
261 |
+
PRIVATE_KEY_ID = os.getenv('PRIVATE_KEY_ID')
|
262 |
+
variables_keys = {
|
263 |
+
"type": "service_account",
|
264 |
+
"project_id": "summer-presence-450117-r7",
|
265 |
+
"private_key_id": "427fe03954113ce7174febe50871c7beba0384cc",
|
266 |
+
"private_key": PRIVATE_KEY,
|
267 |
+
"client_email": "model-blind-comparison@summer-presence-450117-r7.iam.gserviceaccount.com",
|
268 |
+
"client_id": "117681363507032419648",
|
269 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
270 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
271 |
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
272 |
+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/model-blind-comparison%40summer-presence-450117-r7.iam.gserviceaccount.com",
|
273 |
+
"universe_domain": "googleapis.com"
|
274 |
+
}
|
275 |
+
app = ModelComparisonApp()
|
276 |
+
demo = app.create_interface()
|
277 |
+
demo.launch()
|
prompt_template.txt
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Example 1
|
2 |
+
### Question
|
3 |
+
In the context of "Les Misérables", written by Victor Hugo in 1862, what is the main setting of the novel?
|
4 |
+
### Thought Process and Answer
|
5 |
+
In "Les Misérables" by Victor Hugo, the main setting of the novel is early 19th-century France, with a particular focus on Paris. The story begins in smaller locations like the town of Digne, where Jean Valjean encounters Bishop Myriel, and Montreuil-sur-Mer, where Valjean becomes a factory owner and mayor. However, as the narrative progresses, Paris becomes the central backdrop for much of the action. Paris's streets, slums, and sewers set the stage for pivotal events, including the June Rebellion of 1832.
|
6 |
+
|
7 |
+
## Example 2
|
8 |
+
### Question
|
9 |
+
In the context of "Brave New World", written by Aldous Huxley in 1932, what substance is widely used in the society to control citizens' happiness?
|
10 |
+
### Thought Process and Answer
|
11 |
+
In "Brave New World" by Aldous Huxley, the substance widely used to control citizens' happiness is soma. This drug is a central tool of social control in the World State, designed to keep the population docile and content. Soma provides an escape from negative emotions and uncomfortable thoughts, allowing individuals to maintain a superficial sense of happiness without addressing deeper issues. Its use reflects the society's prioritization of stability and order over individual freedom and emotional authenticity. By relying on soma, the World State ensures compliance and suppresses dissent, highlighting the dystopian theme of sacrificing humanity for societal control.
|
12 |
+
|
13 |
+
## Example 3
|
14 |
+
### Question
|
15 |
+
In the context of "Romeo and Juliet", written by William Shakespeare in the early 1590s, what are the names of the two feuding families?
|
16 |
+
### Thought Process and Answer
|
17 |
+
In "Romeo and Juliet" by William Shakespeare, the two feuding families are the Montagues and the Capulets. These families are central to the conflict in the play, and their animosity drives the tragic events that unfold. Romeo Montague falls in love with Juliet Capulet, despite the longstanding hatred between their families. This rivalry between the Montagues and Capulets ultimately leads to the death of both Romeo and Juliet, making it a key theme of the play: the destructive nature of familial conflict and the way it affects the lives of individuals caught in its web.
|
18 |
+
|
19 |
+
## Example 4
|
20 |
+
### Question
|
21 |
+
In the context of "1984", written by George Orwell in 1949, what is the name of the totalitarian leader?
|
22 |
+
### Thought Process and Answer
|
23 |
+
In "1984" by George Orwell, the name of the totalitarian leader is Big Brother. Although Big Brother is not an actual person in the traditional sense, he is the symbolic figurehead of the Party's authoritarian rule over Oceania. His image is omnipresent, with posters of his face and the slogan "Big Brother is watching you" serving as constant reminders to citizens of the Party's surveillance and control. Big Brother represents the Party's totalitarian control over every aspect of people's lives, and the character is central to the novel’s themes of oppression, surveillance, and the loss of personal freedom.
|
24 |
+
|
25 |
+
## Example 5
|
26 |
+
### Question
|
27 |
+
In the context of Pride and Prejudice, written by Jane Austen in 1813, what is the relationship between Elizabeth Bennet and Mr. Darcy?
|
28 |
+
### Thought Process and Answer
|
29 |
+
In "Pride and Prejudice" by Jane Austen, the relationship between Elizabeth Bennet and Mr. Darcy evolves from initial dislike to mutual respect and love. At the start, Elizabeth holds a strong prejudice against Mr. Darcy, due to his haughty behavior and his slighting remarks about her at their first meeting. Mr. Darcy, on the other hand, is initially attracted to Elizabeth but is put off by her lower social standing. However, as the story progresses, Elizabeth learns of Darcy's true character, particularly his role in helping her family, while Darcy comes to realize that his pride has led him to misjudge Elizabeth and her family. By the end of the novel, their relationship transforms into a deep, affectionate love, grounded in understanding and mutual admiration, marking a central theme of the novel: the overcoming of pride and prejudice in relationships.
|
30 |
+
|
31 |
+
## Example 6
|
32 |
+
### Question
|
33 |
+
{}
|
34 |
+
### Thought Process and Answer
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==5.15.0
|
2 |
+
gradio_client==1.7.0
|
3 |
+
gspread==6.1.4
|
4 |
+
oauth2client==4.1.3
|