feat: push
Browse files
app.py
ADDED
@@ -0,0 +1,356 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
from datetime import datetime
|
4 |
+
import gradio as gr
|
5 |
+
import pandas as pd
|
6 |
+
import time
|
7 |
+
import random
|
8 |
+
import uuid
|
9 |
+
|
10 |
+
def save_uploaded_files(files, session_id):
|
11 |
+
"""Save uploaded files to telemetry directory with session ID."""
|
12 |
+
save_dir = os.path.join("telemetry_files", session_id)
|
13 |
+
os.makedirs(save_dir, exist_ok=True)
|
14 |
+
|
15 |
+
saved_paths = []
|
16 |
+
for file in files:
|
17 |
+
if file is not None:
|
18 |
+
filename = os.path.basename(file.name)
|
19 |
+
save_path = os.path.join(save_dir, filename)
|
20 |
+
shutil.copy2(file.name, save_path)
|
21 |
+
saved_paths.append(save_path)
|
22 |
+
|
23 |
+
return saved_paths
|
24 |
+
|
25 |
+
def mock_process_documents(files, chunk_size, num_questions, question_types, complexity_types,
|
26 |
+
difficulty, selected_models):
|
27 |
+
"""Mock processing function that simulates document processing."""
|
28 |
+
time.sleep(5) # Simulate 5 seconds of processing
|
29 |
+
|
30 |
+
# Create session ID and save files
|
31 |
+
session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
|
32 |
+
saved_files = save_uploaded_files(files, session_id)
|
33 |
+
|
34 |
+
data = []
|
35 |
+
for _ in range(num_questions):
|
36 |
+
# Since question_types is now a list of selected values, we can use it directly
|
37 |
+
question_type = random.choice(question_types)
|
38 |
+
complexity = random.choice(complexity_types)
|
39 |
+
model = random.choice(selected_models)
|
40 |
+
|
41 |
+
question = f"[{complexity}] Sample {question_type} question {_+1} (Difficulty: {difficulty:.1f}, Model: {model})"
|
42 |
+
answer = f"This is a sample answer for question {_+1}. Files processed: {', '.join(saved_files)}"
|
43 |
+
data.append({
|
44 |
+
"question_type": question_type,
|
45 |
+
"complexity": complexity,
|
46 |
+
"question": question,
|
47 |
+
"answer": answer,
|
48 |
+
"model": model,
|
49 |
+
"difficulty": difficulty
|
50 |
+
})
|
51 |
+
|
52 |
+
return pd.DataFrame(data)
|
53 |
+
|
54 |
+
def generate_csv_file(df, session_id):
|
55 |
+
"""Generate and save CSV file for the results."""
|
56 |
+
if df.empty:
|
57 |
+
return None
|
58 |
+
|
59 |
+
# Create session directory
|
60 |
+
session_dir = os.path.join("telemetry_files", session_id)
|
61 |
+
os.makedirs(session_dir, exist_ok=True)
|
62 |
+
|
63 |
+
# Save CSV
|
64 |
+
csv_path = os.path.join(session_dir, "results.csv")
|
65 |
+
df.to_csv(csv_path, index=False)
|
66 |
+
return csv_path
|
67 |
+
|
68 |
+
def process_files(
|
69 |
+
input_files, chunk_size, num_questions,
|
70 |
+
question_types_dict, complexity_types_dict,
|
71 |
+
difficulty_level, model_selection_dict
|
72 |
+
):
|
73 |
+
"""Process files with the given configuration."""
|
74 |
+
if not input_files:
|
75 |
+
return pd.DataFrame(), "Error: No files uploaded", None
|
76 |
+
|
77 |
+
# Convert checkbox groups to lists of selected values
|
78 |
+
question_types = question_types_dict
|
79 |
+
complexity_types = complexity_types_dict
|
80 |
+
selected_models = model_selection_dict
|
81 |
+
|
82 |
+
if not question_types or not complexity_types or not selected_models:
|
83 |
+
return pd.DataFrame(), "Error: Please select at least one option from each category", None
|
84 |
+
|
85 |
+
start_time = time.time()
|
86 |
+
results_df = mock_process_documents(
|
87 |
+
input_files, chunk_size, num_questions,
|
88 |
+
question_types, complexity_types,
|
89 |
+
difficulty_level, selected_models
|
90 |
+
)
|
91 |
+
processing_time = time.time() - start_time
|
92 |
+
|
93 |
+
# Generate CSV file
|
94 |
+
session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
|
95 |
+
csv_path = generate_csv_file(results_df, session_id)
|
96 |
+
|
97 |
+
return (
|
98 |
+
results_df,
|
99 |
+
f"Processing completed in {processing_time:.2f} seconds",
|
100 |
+
csv_path if csv_path else None
|
101 |
+
)
|
102 |
+
|
103 |
+
# Create custom theme
|
104 |
+
theme = gr.themes.Base(
|
105 |
+
primary_hue="blue",
|
106 |
+
secondary_hue="indigo",
|
107 |
+
neutral_hue="slate",
|
108 |
+
font=gr.themes.GoogleFont("Inter"),
|
109 |
+
radius_size=gr.themes.sizes.radius_sm,
|
110 |
+
).set(
|
111 |
+
body_background_fill="*neutral_50",
|
112 |
+
body_background_fill_dark="*neutral_950",
|
113 |
+
button_primary_background_fill="*primary_600",
|
114 |
+
button_primary_background_fill_hover="*primary_700",
|
115 |
+
button_primary_text_color="white",
|
116 |
+
button_primary_text_color_dark="white",
|
117 |
+
block_label_text_weight="600",
|
118 |
+
block_title_text_weight="600",
|
119 |
+
input_background_fill="white",
|
120 |
+
input_background_fill_dark="*neutral_800",
|
121 |
+
input_border_color="*neutral_200",
|
122 |
+
input_border_color_dark="*neutral_700",
|
123 |
+
)
|
124 |
+
|
125 |
+
# Create the Gradio interface
|
126 |
+
with gr.Blocks(
|
127 |
+
title="Yourbench - Dynamic Question Generation",
|
128 |
+
theme=theme,
|
129 |
+
css="""
|
130 |
+
.gradio-container {max-width: 1400px !important; margin-left: auto; margin-right: auto}
|
131 |
+
.contain { display: flex; flex-direction: column; }
|
132 |
+
.contain > * { flex: 1}
|
133 |
+
.gap { margin-top: 1rem !important }
|
134 |
+
footer {display: none !important}
|
135 |
+
.citation-box {
|
136 |
+
background-color: #f8fafc;
|
137 |
+
border: 1px solid #e2e8f0;
|
138 |
+
border-radius: 0.5rem;
|
139 |
+
padding: 1rem;
|
140 |
+
margin-top: 2rem;
|
141 |
+
font-family: monospace;
|
142 |
+
}
|
143 |
+
.citation-box pre {
|
144 |
+
margin: 0;
|
145 |
+
white-space: pre-wrap;
|
146 |
+
}
|
147 |
+
.main-panel { min-height: 600px }
|
148 |
+
.output-panel { min-height: 400px }
|
149 |
+
.checkbox-group { max-height: 200px; overflow-y: auto }
|
150 |
+
.model-select { max-height: 150px }
|
151 |
+
.download-btn { margin-top: 1rem !important }
|
152 |
+
"""
|
153 |
+
) as demo:
|
154 |
+
# Header with description
|
155 |
+
gr.Markdown("""
|
156 |
+
# π Yourbench: Dynamic Question Generation Tool
|
157 |
+
|
158 |
+
Generate high-quality questions and answers from your documents using state-of-the-art language models.
|
159 |
+
This tool helps create diverse question types with varying complexity levels, perfect for educational
|
160 |
+
assessment and content understanding.
|
161 |
+
""")
|
162 |
+
|
163 |
+
with gr.Row():
|
164 |
+
# Left column for configuration
|
165 |
+
with gr.Column(scale=2, elem_classes="main-panel"):
|
166 |
+
# Document Upload Section
|
167 |
+
with gr.Group():
|
168 |
+
gr.Markdown("### π Document Upload")
|
169 |
+
input_files = gr.File(
|
170 |
+
label="Upload Documents (PDF/TXT)",
|
171 |
+
file_types=[".txt", ".pdf"],
|
172 |
+
file_count="multiple",
|
173 |
+
elem_id="file_upload",
|
174 |
+
scale=2
|
175 |
+
)
|
176 |
+
|
177 |
+
# Core Parameters Section
|
178 |
+
with gr.Group():
|
179 |
+
gr.Markdown("### βοΈ Core Parameters")
|
180 |
+
with gr.Row():
|
181 |
+
chunk_size = gr.Slider(
|
182 |
+
minimum=100,
|
183 |
+
maximum=1000,
|
184 |
+
value=500,
|
185 |
+
step=50,
|
186 |
+
label="Chunk Size",
|
187 |
+
info="Number of tokens per chunk",
|
188 |
+
elem_id="chunk_size"
|
189 |
+
)
|
190 |
+
num_questions = gr.Slider(
|
191 |
+
minimum=1,
|
192 |
+
maximum=20,
|
193 |
+
value=5,
|
194 |
+
step=1,
|
195 |
+
label="Number of Questions",
|
196 |
+
info="How many questions to generate",
|
197 |
+
elem_id="num_questions"
|
198 |
+
)
|
199 |
+
|
200 |
+
difficulty_level = gr.Slider(
|
201 |
+
minimum=1,
|
202 |
+
maximum=5,
|
203 |
+
value=3,
|
204 |
+
step=0.1,
|
205 |
+
label="Average Difficulty",
|
206 |
+
info="1: Easy, 5: Very Hard",
|
207 |
+
elem_id="difficulty"
|
208 |
+
)
|
209 |
+
|
210 |
+
with gr.Row():
|
211 |
+
# Question Types Section
|
212 |
+
with gr.Column():
|
213 |
+
gr.Markdown("### π― Question Types")
|
214 |
+
question_types_dict = gr.CheckboxGroup(
|
215 |
+
choices=[
|
216 |
+
"Analytical", "Application Based", "Conceptual",
|
217 |
+
"Counterfactual", "Factual", "Open Ended",
|
218 |
+
"True False", "False Premise", "Clarification",
|
219 |
+
"Edge Case"
|
220 |
+
],
|
221 |
+
value=["Analytical", "Factual", "Conceptual", "Application Based"],
|
222 |
+
label="Select Types",
|
223 |
+
elem_id="question_types",
|
224 |
+
elem_classes="checkbox-group"
|
225 |
+
)
|
226 |
+
|
227 |
+
# Complexity and Models Section
|
228 |
+
with gr.Column():
|
229 |
+
with gr.Group():
|
230 |
+
gr.Markdown("### π Complexity")
|
231 |
+
complexity_types_dict = gr.CheckboxGroup(
|
232 |
+
choices=["Single Shot", "Multi Hop"],
|
233 |
+
value=["Single Shot", "Multi Hop"],
|
234 |
+
label="Select Complexity",
|
235 |
+
elem_id="complexity_types"
|
236 |
+
)
|
237 |
+
|
238 |
+
with gr.Group():
|
239 |
+
gr.Markdown("### π€ Models")
|
240 |
+
model_selection_dict = gr.CheckboxGroup(
|
241 |
+
choices=[
|
242 |
+
"Mistral Large",
|
243 |
+
"Llama-3 70B",
|
244 |
+
"GPT-4",
|
245 |
+
"Claude 3.5 Sonnet",
|
246 |
+
"Gemini Pro"
|
247 |
+
],
|
248 |
+
value=["Mistral Large", "GPT-4", "Claude 3.5 Sonnet"],
|
249 |
+
label="Select Models",
|
250 |
+
elem_id="models",
|
251 |
+
elem_classes="model-select"
|
252 |
+
)
|
253 |
+
|
254 |
+
process_btn = gr.Button(
|
255 |
+
"π Generate Questions",
|
256 |
+
variant="primary",
|
257 |
+
size="lg",
|
258 |
+
elem_id="generate_btn"
|
259 |
+
)
|
260 |
+
|
261 |
+
# Right column for outputs
|
262 |
+
with gr.Column(scale=3, elem_classes="output-panel"):
|
263 |
+
with gr.Group():
|
264 |
+
gr.Markdown("### π Generated Questions")
|
265 |
+
output_status = gr.Textbox(
|
266 |
+
label="Status",
|
267 |
+
elem_id="status"
|
268 |
+
)
|
269 |
+
output_table = gr.Dataframe(
|
270 |
+
headers=["question_type", "complexity", "question", "answer", "model", "difficulty"],
|
271 |
+
label="Questions and Answers",
|
272 |
+
elem_id="results_table",
|
273 |
+
wrap=True
|
274 |
+
)
|
275 |
+
csv_output = gr.File(
|
276 |
+
label="Download Results",
|
277 |
+
elem_id="csv_download",
|
278 |
+
elem_classes="download-btn",
|
279 |
+
interactive=False
|
280 |
+
)
|
281 |
+
|
282 |
+
# Instructions Section
|
283 |
+
with gr.Accordion("π Instructions", open=False):
|
284 |
+
gr.Markdown("""
|
285 |
+
1. **Upload Documents**: Support for .txt and .pdf files
|
286 |
+
2. **Configure Parameters**:
|
287 |
+
- Set chunk size for document processing
|
288 |
+
- Choose number of questions to generate
|
289 |
+
- Adjust difficulty level (1: Easy to 5: Very Hard)
|
290 |
+
3. **Select Question Types**: Choose from various question categories
|
291 |
+
4. **Set Complexity**: Single-shot or multi-hop reasoning
|
292 |
+
5. **Choose Models**: Select AI models for ensemble generation
|
293 |
+
6. Click 'π Generate Questions' to start
|
294 |
+
7. Download results as CSV for further use
|
295 |
+
""")
|
296 |
+
|
297 |
+
# Citation Section
|
298 |
+
gr.Markdown("""
|
299 |
+
### π Citation
|
300 |
+
If you find this work helpful in your research or applications, please cite:
|
301 |
+
""")
|
302 |
+
|
303 |
+
with gr.Group(elem_classes="citation-box"):
|
304 |
+
gr.Markdown("""```bibtex
|
305 |
+
@misc{yourbench2024,
|
306 |
+
title={Yourbench: A Dynamic Question Generation Framework for Document Understanding},
|
307 |
+
author={Your Team},
|
308 |
+
year={2024},
|
309 |
+
publisher={GitHub},
|
310 |
+
journal={GitHub repository},
|
311 |
+
howpublished={\\url{https://github.com/yourbench/yourbench}},
|
312 |
+
}
|
313 |
+
```""")
|
314 |
+
|
315 |
+
# API Information
|
316 |
+
gr.Markdown("""
|
317 |
+
### π API Usage
|
318 |
+
|
319 |
+
This tool can be used programmatically through its API. Here's how to interact with it:
|
320 |
+
|
321 |
+
```python
|
322 |
+
import gradio_client
|
323 |
+
|
324 |
+
client = gradio_client.Client("YOUR_SPACE_URL")
|
325 |
+
|
326 |
+
result = client.predict(
|
327 |
+
["document.pdf"], # Input files
|
328 |
+
500, # Chunk size
|
329 |
+
5, # Number of questions
|
330 |
+
["Analytical", "Factual"], # Question types
|
331 |
+
["Single Shot"], # Complexity types
|
332 |
+
3.0, # Difficulty level
|
333 |
+
["GPT-4", "Claude 3.5 Sonnet"], # Models
|
334 |
+
api_name="/predict"
|
335 |
+
)
|
336 |
+
```
|
337 |
+
|
338 |
+
Replace `YOUR_SPACE_URL` with the actual deployment URL. The API endpoint accepts the same parameters
|
339 |
+
as the web interface and returns a tuple containing the results DataFrame, status message, and CSV file path.
|
340 |
+
""")
|
341 |
+
|
342 |
+
|
343 |
+
# Event handler
|
344 |
+
process_btn.click(
|
345 |
+
process_files,
|
346 |
+
inputs=[
|
347 |
+
input_files, chunk_size, num_questions,
|
348 |
+
question_types_dict, complexity_types_dict,
|
349 |
+
difficulty_level, model_selection_dict
|
350 |
+
],
|
351 |
+
outputs=[output_table, output_status, csv_output]
|
352 |
+
)
|
353 |
+
|
354 |
+
if __name__ == "__main__":
|
355 |
+
demo.launch(share=True)
|
356 |
+
|