import streamlit as st import requests import json import fitz # PyMuPDF from fpdf import FPDF import os import tempfile import base64 import dotenv from dotenv import load_dotenv load_dotenv() # Previous functions from Question Generator def get_pdf_path(pdf_source=None, uploaded_file=None): try: # If a file is uploaded locally if uploaded_file is not None: # Create a temporary file to save the uploaded PDF temp_dir = tempfile.mkdtemp() pdf_path = os.path.join(temp_dir, uploaded_file.name) # Save the uploaded file with open(pdf_path, "wb") as pdf_file: pdf_file.write(uploaded_file.getvalue()) return pdf_path # If a URL is provided if pdf_source: response = requests.get(pdf_source, timeout=30) response.raise_for_status() # Create a temporary file temp_dir = tempfile.mkdtemp() pdf_path = os.path.join(temp_dir, "downloaded.pdf") with open(pdf_path, "wb") as pdf_file: pdf_file.write(response.content) return pdf_path # If no source is provided st.error("No PDF source provided.") return None except Exception as e: st.error(f"Error getting PDF: {e}") return None def extract_text_pymupdf(pdf_path): try: doc = fitz.open(pdf_path) pages_content = [] for page_num in range(len(doc)): page = doc[page_num] pages_content.append(page.get_text()) doc.close() return " ".join(pages_content) # Join all pages into one large context string except Exception as e: st.error(f"Error extracting text from PDF: {e}") return "" def generate_ai_response(api_key, assistant_context, user_query, role_description, response_instructions, bloom_taxonomy_weights, num_questions): try: url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={api_key}" prompt = f""" You are a highly knowledgeable assistant. Your task is to assist the user with the following context from an academic paper. **Role**: {role_description} **Context**: {assistant_context} **Instructions**: {response_instructions} **Bloom's Taxonomy Weights**: Knowledge: {bloom_taxonomy_weights['Knowledge']}% Comprehension: {bloom_taxonomy_weights['Comprehension']}% Application: {bloom_taxonomy_weights['Application']}% Analysis: {bloom_taxonomy_weights['Analysis']}% Synthesis: {bloom_taxonomy_weights['Synthesis']}% Evaluation: {bloom_taxonomy_weights['Evaluation']}% **Query**: {user_query} **Number of Questions**: {num_questions} """ payload = { "contents": [ { "parts": [ {"text": prompt} ] } ] } headers = {"Content-Type": "application/json"} response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=60) response.raise_for_status() result = response.json() questions = result.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "") questions_list = [question.strip() for question in questions.split("\n") if question.strip()] return questions_list except requests.RequestException as e: st.error(f"API request error: {e}") return [] except Exception as e: st.error(f"Error generating questions: {e}") return [] def normalize_bloom_weights(bloom_weights): total = sum(bloom_weights.values()) if total != 100: normalization_factor = 100 / total # Normalize each weight by multiplying it by the normalization factor bloom_weights = {key: round(value * normalization_factor, 2) for key, value in bloom_weights.items()} return bloom_weights def generate_pdf(questions, filename="questions.pdf"): try: pdf = FPDF() pdf.set_auto_page_break(auto=True, margin=15) pdf.add_page() # Set font pdf.set_font("Arial", size=12) # Add a title or heading pdf.cell(200, 10, txt="Generated Questions", ln=True, align="C") # Add space between title and questions pdf.ln(10) # Loop through questions and add them to the PDF for i, question in enumerate(questions, 1): # Using multi_cell for wrapping the text in case it's too long pdf.multi_cell(0, 10, f"Q{i}: {question}") # Save the generated PDF to the file pdf.output(filename) return filename except Exception as e: st.error(f"Error generating PDF: {e}") return None def process_pdf_and_generate_questions(pdf_source, uploaded_file, api_key, role_description, response_instructions, bloom_taxonomy_weights, num_questions): try: # Get PDF path (either from URL or uploaded file) pdf_path = get_pdf_path(pdf_source, uploaded_file) if not pdf_path: return [] # Extract text pdf_text = extract_text_pymupdf(pdf_path) if not pdf_text: return [] # Generate questions assistant_context = pdf_text user_query = "Generate questions based on the above context." normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights) questions = generate_ai_response( api_key, assistant_context, user_query, role_description, response_instructions, normalized_bloom_weights, num_questions ) # Clean up temporary PDF file try: os.remove(pdf_path) # Remove the temporary directory os.rmdir(os.path.dirname(pdf_path)) except Exception as e: st.warning(f"Could not delete temporary PDF file: {e}") return questions except Exception as e: st.error(f"Error processing PDF and generating questions: {e}") return [] dummydata = [ {"question": "What is the main idea of the paper?", "score": { "Knowledge": 10, "Comprehension": 9, "Application": 8, "Analysis": 7, "Synthesis": 6, "Evaluation": 5 }}, {"question": "What are the key findings of the paper?", "score": { "Knowledge": 9, "Comprehension": 8, "Application": 7, "Analysis": 6, "Synthesis": 5, "Evaluation": 4 }}, {"question": "How does the paper contribute to the field?", "score": { "Knowledge": 8, "Comprehension": 7, "Application": 6, "Analysis": 5, "Synthesis": 4, "Evaluation": 3 }}, {"question": "What are the limitations of the paper?", "score": { "Knowledge": 7, "Comprehension": 6, "Application": 5, "Analysis": 4, "Synthesis": 3, "Evaluation": 2 }}, {"question": "What are the future research directions?", "score": { "Knowledge": 6, "Comprehension": 5, "Application": 4, "Analysis": 3, "Synthesis": 2, "Evaluation": 1 }}, {"question": "How does the paper compare to existing work?", "score": { "Knowledge": 5, "Comprehension": 4, "Application": 3, "Analysis": 2, "Synthesis": 1, "Evaluation": 0 } } ] def main(): st.set_page_config(page_title="Academic Paper Tool", page_icon="📝", layout="wide") # Tabs for different functionalities tab1, tab2 = st.tabs(["Question Generator", "Paper Scorer"]) if 'totalscore' not in st.session_state: st.session_state.totalscore = None if 'show_details' not in st.session_state: st.session_state.show_details = False # Question Generator Tab with tab1: st.title("🎓 Academic Paper Question Generator") st.markdown("Generate insightful questions from academic papers using Bloom's Taxonomy") # Initialize session state variables with defaults if 'pdf_source_type' not in st.session_state: st.session_state.pdf_source_type = "URL" if 'pdf_url' not in st.session_state: st.session_state.pdf_url = "https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf" if 'uploaded_file' not in st.session_state: st.session_state.uploaded_file = None if 'questions' not in st.session_state: st.session_state.questions = [] if 'accepted_questions' not in st.session_state: st.session_state.accepted_questions = [] # API Configuration api_key = os.getenv('GEMINI_API_KEY') # api_key = st.sidebar.text_input("Enter Gemini API Key", type="password", value=apivalue) # Main form for PDF and question generation with st.form(key='pdf_generation_form'): st.header("PDF Source Configuration") st.session_state.pdf_url = st.text_input( "Enter the URL of the PDF", key="pdf_url_input" ) st.markdown("