sumuks commited on
Commit
09c48e8
Β·
1 Parent(s): 3ce60dd

feat: push

Browse files
Files changed (1) hide show
  1. app.py +356 -0
app.py ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from datetime import datetime
4
+ import gradio as gr
5
+ import pandas as pd
6
+ import time
7
+ import random
8
+ import uuid
9
+
10
+ def save_uploaded_files(files, session_id):
11
+ """Save uploaded files to telemetry directory with session ID."""
12
+ save_dir = os.path.join("telemetry_files", session_id)
13
+ os.makedirs(save_dir, exist_ok=True)
14
+
15
+ saved_paths = []
16
+ for file in files:
17
+ if file is not None:
18
+ filename = os.path.basename(file.name)
19
+ save_path = os.path.join(save_dir, filename)
20
+ shutil.copy2(file.name, save_path)
21
+ saved_paths.append(save_path)
22
+
23
+ return saved_paths
24
+
25
+ def mock_process_documents(files, chunk_size, num_questions, question_types, complexity_types,
26
+ difficulty, selected_models):
27
+ """Mock processing function that simulates document processing."""
28
+ time.sleep(5) # Simulate 5 seconds of processing
29
+
30
+ # Create session ID and save files
31
+ session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
32
+ saved_files = save_uploaded_files(files, session_id)
33
+
34
+ data = []
35
+ for _ in range(num_questions):
36
+ # Since question_types is now a list of selected values, we can use it directly
37
+ question_type = random.choice(question_types)
38
+ complexity = random.choice(complexity_types)
39
+ model = random.choice(selected_models)
40
+
41
+ question = f"[{complexity}] Sample {question_type} question {_+1} (Difficulty: {difficulty:.1f}, Model: {model})"
42
+ answer = f"This is a sample answer for question {_+1}. Files processed: {', '.join(saved_files)}"
43
+ data.append({
44
+ "question_type": question_type,
45
+ "complexity": complexity,
46
+ "question": question,
47
+ "answer": answer,
48
+ "model": model,
49
+ "difficulty": difficulty
50
+ })
51
+
52
+ return pd.DataFrame(data)
53
+
54
+ def generate_csv_file(df, session_id):
55
+ """Generate and save CSV file for the results."""
56
+ if df.empty:
57
+ return None
58
+
59
+ # Create session directory
60
+ session_dir = os.path.join("telemetry_files", session_id)
61
+ os.makedirs(session_dir, exist_ok=True)
62
+
63
+ # Save CSV
64
+ csv_path = os.path.join(session_dir, "results.csv")
65
+ df.to_csv(csv_path, index=False)
66
+ return csv_path
67
+
68
+ def process_files(
69
+ input_files, chunk_size, num_questions,
70
+ question_types_dict, complexity_types_dict,
71
+ difficulty_level, model_selection_dict
72
+ ):
73
+ """Process files with the given configuration."""
74
+ if not input_files:
75
+ return pd.DataFrame(), "Error: No files uploaded", None
76
+
77
+ # Convert checkbox groups to lists of selected values
78
+ question_types = question_types_dict
79
+ complexity_types = complexity_types_dict
80
+ selected_models = model_selection_dict
81
+
82
+ if not question_types or not complexity_types or not selected_models:
83
+ return pd.DataFrame(), "Error: Please select at least one option from each category", None
84
+
85
+ start_time = time.time()
86
+ results_df = mock_process_documents(
87
+ input_files, chunk_size, num_questions,
88
+ question_types, complexity_types,
89
+ difficulty_level, selected_models
90
+ )
91
+ processing_time = time.time() - start_time
92
+
93
+ # Generate CSV file
94
+ session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
95
+ csv_path = generate_csv_file(results_df, session_id)
96
+
97
+ return (
98
+ results_df,
99
+ f"Processing completed in {processing_time:.2f} seconds",
100
+ csv_path if csv_path else None
101
+ )
102
+
103
+ # Create custom theme
104
+ theme = gr.themes.Base(
105
+ primary_hue="blue",
106
+ secondary_hue="indigo",
107
+ neutral_hue="slate",
108
+ font=gr.themes.GoogleFont("Inter"),
109
+ radius_size=gr.themes.sizes.radius_sm,
110
+ ).set(
111
+ body_background_fill="*neutral_50",
112
+ body_background_fill_dark="*neutral_950",
113
+ button_primary_background_fill="*primary_600",
114
+ button_primary_background_fill_hover="*primary_700",
115
+ button_primary_text_color="white",
116
+ button_primary_text_color_dark="white",
117
+ block_label_text_weight="600",
118
+ block_title_text_weight="600",
119
+ input_background_fill="white",
120
+ input_background_fill_dark="*neutral_800",
121
+ input_border_color="*neutral_200",
122
+ input_border_color_dark="*neutral_700",
123
+ )
124
+
125
+ # Create the Gradio interface
126
+ with gr.Blocks(
127
+ title="Yourbench - Dynamic Question Generation",
128
+ theme=theme,
129
+ css="""
130
+ .gradio-container {max-width: 1400px !important; margin-left: auto; margin-right: auto}
131
+ .contain { display: flex; flex-direction: column; }
132
+ .contain > * { flex: 1}
133
+ .gap { margin-top: 1rem !important }
134
+ footer {display: none !important}
135
+ .citation-box {
136
+ background-color: #f8fafc;
137
+ border: 1px solid #e2e8f0;
138
+ border-radius: 0.5rem;
139
+ padding: 1rem;
140
+ margin-top: 2rem;
141
+ font-family: monospace;
142
+ }
143
+ .citation-box pre {
144
+ margin: 0;
145
+ white-space: pre-wrap;
146
+ }
147
+ .main-panel { min-height: 600px }
148
+ .output-panel { min-height: 400px }
149
+ .checkbox-group { max-height: 200px; overflow-y: auto }
150
+ .model-select { max-height: 150px }
151
+ .download-btn { margin-top: 1rem !important }
152
+ """
153
+ ) as demo:
154
+ # Header with description
155
+ gr.Markdown("""
156
+ # πŸ“š Yourbench: Dynamic Question Generation Tool
157
+
158
+ Generate high-quality questions and answers from your documents using state-of-the-art language models.
159
+ This tool helps create diverse question types with varying complexity levels, perfect for educational
160
+ assessment and content understanding.
161
+ """)
162
+
163
+ with gr.Row():
164
+ # Left column for configuration
165
+ with gr.Column(scale=2, elem_classes="main-panel"):
166
+ # Document Upload Section
167
+ with gr.Group():
168
+ gr.Markdown("### πŸ“„ Document Upload")
169
+ input_files = gr.File(
170
+ label="Upload Documents (PDF/TXT)",
171
+ file_types=[".txt", ".pdf"],
172
+ file_count="multiple",
173
+ elem_id="file_upload",
174
+ scale=2
175
+ )
176
+
177
+ # Core Parameters Section
178
+ with gr.Group():
179
+ gr.Markdown("### βš™οΈ Core Parameters")
180
+ with gr.Row():
181
+ chunk_size = gr.Slider(
182
+ minimum=100,
183
+ maximum=1000,
184
+ value=500,
185
+ step=50,
186
+ label="Chunk Size",
187
+ info="Number of tokens per chunk",
188
+ elem_id="chunk_size"
189
+ )
190
+ num_questions = gr.Slider(
191
+ minimum=1,
192
+ maximum=20,
193
+ value=5,
194
+ step=1,
195
+ label="Number of Questions",
196
+ info="How many questions to generate",
197
+ elem_id="num_questions"
198
+ )
199
+
200
+ difficulty_level = gr.Slider(
201
+ minimum=1,
202
+ maximum=5,
203
+ value=3,
204
+ step=0.1,
205
+ label="Average Difficulty",
206
+ info="1: Easy, 5: Very Hard",
207
+ elem_id="difficulty"
208
+ )
209
+
210
+ with gr.Row():
211
+ # Question Types Section
212
+ with gr.Column():
213
+ gr.Markdown("### 🎯 Question Types")
214
+ question_types_dict = gr.CheckboxGroup(
215
+ choices=[
216
+ "Analytical", "Application Based", "Conceptual",
217
+ "Counterfactual", "Factual", "Open Ended",
218
+ "True False", "False Premise", "Clarification",
219
+ "Edge Case"
220
+ ],
221
+ value=["Analytical", "Factual", "Conceptual", "Application Based"],
222
+ label="Select Types",
223
+ elem_id="question_types",
224
+ elem_classes="checkbox-group"
225
+ )
226
+
227
+ # Complexity and Models Section
228
+ with gr.Column():
229
+ with gr.Group():
230
+ gr.Markdown("### πŸ”„ Complexity")
231
+ complexity_types_dict = gr.CheckboxGroup(
232
+ choices=["Single Shot", "Multi Hop"],
233
+ value=["Single Shot", "Multi Hop"],
234
+ label="Select Complexity",
235
+ elem_id="complexity_types"
236
+ )
237
+
238
+ with gr.Group():
239
+ gr.Markdown("### πŸ€– Models")
240
+ model_selection_dict = gr.CheckboxGroup(
241
+ choices=[
242
+ "Mistral Large",
243
+ "Llama-3 70B",
244
+ "GPT-4",
245
+ "Claude 3.5 Sonnet",
246
+ "Gemini Pro"
247
+ ],
248
+ value=["Mistral Large", "GPT-4", "Claude 3.5 Sonnet"],
249
+ label="Select Models",
250
+ elem_id="models",
251
+ elem_classes="model-select"
252
+ )
253
+
254
+ process_btn = gr.Button(
255
+ "πŸš€ Generate Questions",
256
+ variant="primary",
257
+ size="lg",
258
+ elem_id="generate_btn"
259
+ )
260
+
261
+ # Right column for outputs
262
+ with gr.Column(scale=3, elem_classes="output-panel"):
263
+ with gr.Group():
264
+ gr.Markdown("### πŸ“Š Generated Questions")
265
+ output_status = gr.Textbox(
266
+ label="Status",
267
+ elem_id="status"
268
+ )
269
+ output_table = gr.Dataframe(
270
+ headers=["question_type", "complexity", "question", "answer", "model", "difficulty"],
271
+ label="Questions and Answers",
272
+ elem_id="results_table",
273
+ wrap=True
274
+ )
275
+ csv_output = gr.File(
276
+ label="Download Results",
277
+ elem_id="csv_download",
278
+ elem_classes="download-btn",
279
+ interactive=False
280
+ )
281
+
282
+ # Instructions Section
283
+ with gr.Accordion("πŸ“ Instructions", open=False):
284
+ gr.Markdown("""
285
+ 1. **Upload Documents**: Support for .txt and .pdf files
286
+ 2. **Configure Parameters**:
287
+ - Set chunk size for document processing
288
+ - Choose number of questions to generate
289
+ - Adjust difficulty level (1: Easy to 5: Very Hard)
290
+ 3. **Select Question Types**: Choose from various question categories
291
+ 4. **Set Complexity**: Single-shot or multi-hop reasoning
292
+ 5. **Choose Models**: Select AI models for ensemble generation
293
+ 6. Click 'πŸš€ Generate Questions' to start
294
+ 7. Download results as CSV for further use
295
+ """)
296
+
297
+ # Citation Section
298
+ gr.Markdown("""
299
+ ### πŸ“š Citation
300
+ If you find this work helpful in your research or applications, please cite:
301
+ """)
302
+
303
+ with gr.Group(elem_classes="citation-box"):
304
+ gr.Markdown("""```bibtex
305
+ @misc{yourbench2024,
306
+ title={Yourbench: A Dynamic Question Generation Framework for Document Understanding},
307
+ author={Your Team},
308
+ year={2024},
309
+ publisher={GitHub},
310
+ journal={GitHub repository},
311
+ howpublished={\\url{https://github.com/yourbench/yourbench}},
312
+ }
313
+ ```""")
314
+
315
+ # API Information
316
+ gr.Markdown("""
317
+ ### πŸ”Œ API Usage
318
+
319
+ This tool can be used programmatically through its API. Here's how to interact with it:
320
+
321
+ ```python
322
+ import gradio_client
323
+
324
+ client = gradio_client.Client("YOUR_SPACE_URL")
325
+
326
+ result = client.predict(
327
+ ["document.pdf"], # Input files
328
+ 500, # Chunk size
329
+ 5, # Number of questions
330
+ ["Analytical", "Factual"], # Question types
331
+ ["Single Shot"], # Complexity types
332
+ 3.0, # Difficulty level
333
+ ["GPT-4", "Claude 3.5 Sonnet"], # Models
334
+ api_name="/predict"
335
+ )
336
+ ```
337
+
338
+ Replace `YOUR_SPACE_URL` with the actual deployment URL. The API endpoint accepts the same parameters
339
+ as the web interface and returns a tuple containing the results DataFrame, status message, and CSV file path.
340
+ """)
341
+
342
+
343
+ # Event handler
344
+ process_btn.click(
345
+ process_files,
346
+ inputs=[
347
+ input_files, chunk_size, num_questions,
348
+ question_types_dict, complexity_types_dict,
349
+ difficulty_level, model_selection_dict
350
+ ],
351
+ outputs=[output_table, output_status, csv_output]
352
+ )
353
+
354
+ if __name__ == "__main__":
355
+ demo.launch(share=True)
356
+