Spaces:

sumuks
/

yourbench

Sleeping

App Files Files Community

sumuks commited on Dec 3, 2024

Commit

09c48e8

1 Parent(s): 3ce60dd

feat: push

Browse files

Files changed (1) hide show

app.py +356 -0

app.py ADDED Viewed

	@@ -0,0 +1,356 @@

+import os
+import shutil
+from datetime import datetime
+import gradio as gr
+import pandas as pd
+import time
+import random
+import uuid
+def save_uploaded_files(files, session_id):
+    """Save uploaded files to telemetry directory with session ID."""
+    save_dir = os.path.join("telemetry_files", session_id)
+    os.makedirs(save_dir, exist_ok=True)
+    saved_paths = []
+    for file in files:
+        if file is not None:
+            filename = os.path.basename(file.name)
+            save_path = os.path.join(save_dir, filename)
+            shutil.copy2(file.name, save_path)
+            saved_paths.append(save_path)
+    return saved_paths
+def mock_process_documents(files, chunk_size, num_questions, question_types, complexity_types,
+                         difficulty, selected_models):
+    """Mock processing function that simulates document processing."""
+    time.sleep(5)  # Simulate 5 seconds of processing
+    # Create session ID and save files
+    session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
+    saved_files = save_uploaded_files(files, session_id)
+    data = []
+    for _ in range(num_questions):
+        # Since question_types is now a list of selected values, we can use it directly
+        question_type = random.choice(question_types)
+        complexity = random.choice(complexity_types)
+        model = random.choice(selected_models)
+        question = f"[{complexity}] Sample {question_type} question {_+1} (Difficulty: {difficulty:.1f}, Model: {model})"
+        answer = f"This is a sample answer for question {_+1}. Files processed: {', '.join(saved_files)}"
+        data.append({
+            "question_type": question_type,
+            "complexity": complexity,
+            "question": question,
+            "answer": answer,
+            "model": model,
+            "difficulty": difficulty
+        })
+    return pd.DataFrame(data)
+def generate_csv_file(df, session_id):
+    """Generate and save CSV file for the results."""
+    if df.empty:
+        return None
+    # Create session directory
+    session_dir = os.path.join("telemetry_files", session_id)
+    os.makedirs(session_dir, exist_ok=True)
+    # Save CSV
+    csv_path = os.path.join(session_dir, "results.csv")
+    df.to_csv(csv_path, index=False)
+    return csv_path
+def process_files(
+    input_files, chunk_size, num_questions,
+    question_types_dict, complexity_types_dict,
+    difficulty_level, model_selection_dict
+):
+    """Process files with the given configuration."""
+    if not input_files:
+        return pd.DataFrame(), "Error: No files uploaded", None
+    # Convert checkbox groups to lists of selected values
+    question_types = question_types_dict
+    complexity_types = complexity_types_dict
+    selected_models = model_selection_dict
+    if not question_types or not complexity_types or not selected_models:
+        return pd.DataFrame(), "Error: Please select at least one option from each category", None
+    start_time = time.time()
+    results_df = mock_process_documents(
+        input_files, chunk_size, num_questions,
+        question_types, complexity_types,
+        difficulty_level, selected_models
+    )
+    processing_time = time.time() - start_time
+    # Generate CSV file
+    session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
+    csv_path = generate_csv_file(results_df, session_id)
+    return (
+        results_df,
+        f"Processing completed in {processing_time:.2f} seconds",
+        csv_path if csv_path else None
+    )
+# Create custom theme
+theme = gr.themes.Base(
+    primary_hue="blue",
+    secondary_hue="indigo",
+    neutral_hue="slate",
+    font=gr.themes.GoogleFont("Inter"),
+    radius_size=gr.themes.sizes.radius_sm,
+).set(
+    body_background_fill="*neutral_50",
+    body_background_fill_dark="*neutral_950",
+    button_primary_background_fill="*primary_600",
+    button_primary_background_fill_hover="*primary_700",
+    button_primary_text_color="white",
+    button_primary_text_color_dark="white",
+    block_label_text_weight="600",
+    block_title_text_weight="600",
+    input_background_fill="white",
+    input_background_fill_dark="*neutral_800",
+    input_border_color="*neutral_200",
+    input_border_color_dark="*neutral_700",
+)
+# Create the Gradio interface
+with gr.Blocks(
+    title="Yourbench - Dynamic Question Generation",
+    theme=theme,
+    css="""
+        .gradio-container {max-width: 1400px !important; margin-left: auto; margin-right: auto}
+        .contain { display: flex; flex-direction: column; }
+        .contain > * { flex: 1}
+        .gap { margin-top: 1rem !important }
+        footer {display: none !important}
+        .citation-box {
+            background-color: #f8fafc;
+            border: 1px solid #e2e8f0;
+            border-radius: 0.5rem;
+            padding: 1rem;
+            margin-top: 2rem;
+            font-family: monospace;
+        }
+        .citation-box pre {
+            margin: 0;
+            white-space: pre-wrap;
+        }
+        .main-panel { min-height: 600px }
+        .output-panel { min-height: 400px }
+        .checkbox-group { max-height: 200px; overflow-y: auto }
+        .model-select { max-height: 150px }
+        .download-btn { margin-top: 1rem !important }
+    """
+) as demo:
+    # Header with description
+    gr.Markdown("""
+    # 📚 Yourbench: Dynamic Question Generation Tool
+    Generate high-quality questions and answers from your documents using state-of-the-art language models.
+    This tool helps create diverse question types with varying complexity levels, perfect for educational
+    assessment and content understanding.
+    """)
+    with gr.Row():
+        # Left column for configuration
+        with gr.Column(scale=2, elem_classes="main-panel"):
+            # Document Upload Section
+            with gr.Group():
+                gr.Markdown("### 📄 Document Upload")
+                input_files = gr.File(
+                    label="Upload Documents (PDF/TXT)",
+                    file_types=[".txt", ".pdf"],
+                    file_count="multiple",
+                    elem_id="file_upload",
+                    scale=2
+                )
+            # Core Parameters Section
+            with gr.Group():
+                gr.Markdown("### ⚙️ Core Parameters")
+                with gr.Row():
+                    chunk_size = gr.Slider(
+                        minimum=100,
+                        maximum=1000,
+                        value=500,
+                        step=50,
+                        label="Chunk Size",
+                        info="Number of tokens per chunk",
+                        elem_id="chunk_size"
+                    )
+                    num_questions = gr.Slider(
+                        minimum=1,
+                        maximum=20,
+                        value=5,
+                        step=1,
+                        label="Number of Questions",
+                        info="How many questions to generate",
+                        elem_id="num_questions"
+                    )
+                difficulty_level = gr.Slider(
+                    minimum=1,
+                    maximum=5,
+                    value=3,
+                    step=0.1,
+                    label="Average Difficulty",
+                    info="1: Easy, 5: Very Hard",
+                    elem_id="difficulty"
+                )
+            with gr.Row():
+                # Question Types Section
+                with gr.Column():
+                    gr.Markdown("### 🎯 Question Types")
+                    question_types_dict = gr.CheckboxGroup(
+                        choices=[
+                            "Analytical", "Application Based", "Conceptual",
+                            "Counterfactual", "Factual", "Open Ended",
+                            "True False", "False Premise", "Clarification",
+                            "Edge Case"
+                        ],
+                        value=["Analytical", "Factual", "Conceptual", "Application Based"],
+                        label="Select Types",
+                        elem_id="question_types",
+                        elem_classes="checkbox-group"
+                    )
+                # Complexity and Models Section
+                with gr.Column():
+                    with gr.Group():
+                        gr.Markdown("### 🔄 Complexity")
+                        complexity_types_dict = gr.CheckboxGroup(
+                            choices=["Single Shot", "Multi Hop"],
+                            value=["Single Shot", "Multi Hop"],
+                            label="Select Complexity",
+                            elem_id="complexity_types"
+                        )
+                    with gr.Group():
+                        gr.Markdown("### 🤖 Models")
+                        model_selection_dict = gr.CheckboxGroup(
+                            choices=[
+                                "Mistral Large",
+                                "Llama-3 70B",
+                                "GPT-4",
+                                "Claude 3.5 Sonnet",
+                                "Gemini Pro"
+                            ],
+                            value=["Mistral Large", "GPT-4", "Claude 3.5 Sonnet"],
+                            label="Select Models",
+                            elem_id="models",
+                            elem_classes="model-select"
+                        )
+            process_btn = gr.Button(
+                "🚀 Generate Questions",
+                variant="primary",
+                size="lg",
+                elem_id="generate_btn"
+            )
+        # Right column for outputs
+        with gr.Column(scale=3, elem_classes="output-panel"):
+            with gr.Group():
+                gr.Markdown("### 📊 Generated Questions")
+                output_status = gr.Textbox(
+                    label="Status",
+                    elem_id="status"
+                )
+                output_table = gr.Dataframe(
+                    headers=["question_type", "complexity", "question", "answer", "model", "difficulty"],
+                    label="Questions and Answers",
+                    elem_id="results_table",
+                    wrap=True
+                )
+                csv_output = gr.File(
+                    label="Download Results",
+                    elem_id="csv_download",
+                    elem_classes="download-btn",
+                    interactive=False
+                )
+    # Instructions Section
+    with gr.Accordion("📝 Instructions", open=False):
+        gr.Markdown("""
+        1. **Upload Documents**: Support for .txt and .pdf files
+        2. **Configure Parameters**:
+           - Set chunk size for document processing
+           - Choose number of questions to generate
+           - Adjust difficulty level (1: Easy to 5: Very Hard)
+        3. **Select Question Types**: Choose from various question categories
+        4. **Set Complexity**: Single-shot or multi-hop reasoning
+        5. **Choose Models**: Select AI models for ensemble generation
+        6. Click '🚀 Generate Questions' to start
+        7. Download results as CSV for further use
+        """)
+    # Citation Section
+    gr.Markdown("""
+    ### 📚 Citation
+    If you find this work helpful in your research or applications, please cite:
+    """)
+    with gr.Group(elem_classes="citation-box"):
+        gr.Markdown("""```bibtex
+@misc{yourbench2024,
+    title={Yourbench: A Dynamic Question Generation Framework for Document Understanding},
+    author={Your Team},
+    year={2024},
+    publisher={GitHub},
+    journal={GitHub repository},
+    howpublished={\\url{https://github.com/yourbench/yourbench}},
+}
+```""")
+    # API Information
+    gr.Markdown("""
+    ### 🔌 API Usage
+    This tool can be used programmatically through its API. Here's how to interact with it:
+    ```python
+    import gradio_client
+    client = gradio_client.Client("YOUR_SPACE_URL")
+    result = client.predict(
+        ["document.pdf"],                      # Input files
+        500,                                   # Chunk size
+        5,                                     # Number of questions
+        ["Analytical", "Factual"],             # Question types
+        ["Single Shot"],                       # Complexity types
+        3.0,                                   # Difficulty level
+        ["GPT-4", "Claude 3.5 Sonnet"],        # Models
+        api_name="/predict"
+    )
+    ```
+    Replace `YOUR_SPACE_URL` with the actual deployment URL. The API endpoint accepts the same parameters
+    as the web interface and returns a tuple containing the results DataFrame, status message, and CSV file path.
+    """)
+    # Event handler
+    process_btn.click(
+        process_files,
+        inputs=[
+            input_files, chunk_size, num_questions,
+            question_types_dict, complexity_types_dict,
+            difficulty_level, model_selection_dict
+        ],
+        outputs=[output_table, output_status, csv_output]
+    )
+if __name__ == "__main__":
+    demo.launch(share=True)