Spaces:
Running
Running
import streamlit as st | |
import requests | |
import json | |
import fitz # PyMuPDF | |
from fpdf import FPDF | |
import os | |
import tempfile | |
import base64 | |
import dotenv | |
from dotenv import load_dotenv | |
load_dotenv() | |
# Previous functions from Question Generator | |
def get_pdf_path(pdf_source=None, uploaded_file=None): | |
try: | |
# If a file is uploaded locally | |
if uploaded_file is not None: | |
# Create a temporary file to save the uploaded PDF | |
temp_dir = tempfile.mkdtemp() | |
pdf_path = os.path.join(temp_dir, uploaded_file.name) | |
# Save the uploaded file | |
with open(pdf_path, "wb") as pdf_file: | |
pdf_file.write(uploaded_file.getvalue()) | |
return pdf_path | |
# If a URL is provided | |
if pdf_source: | |
response = requests.get(pdf_source, timeout=30) | |
response.raise_for_status() | |
# Create a temporary file | |
temp_dir = tempfile.mkdtemp() | |
pdf_path = os.path.join(temp_dir, "downloaded.pdf") | |
with open(pdf_path, "wb") as pdf_file: | |
pdf_file.write(response.content) | |
return pdf_path | |
# If no source is provided | |
st.error("No PDF source provided.") | |
return None | |
except Exception as e: | |
st.error(f"Error getting PDF: {e}") | |
return None | |
def extract_text_pymupdf(pdf_path): | |
try: | |
doc = fitz.open(pdf_path) | |
pages_content = [] | |
for page_num in range(len(doc)): | |
page = doc[page_num] | |
pages_content.append(page.get_text()) | |
doc.close() | |
return " ".join(pages_content) # Join all pages into one large context string | |
except Exception as e: | |
st.error(f"Error extracting text from PDF: {e}") | |
return "" | |
def generate_ai_response(api_key, assistant_context, user_query, role_description, response_instructions, bloom_taxonomy_weights, num_questions): | |
try: | |
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={api_key}" | |
prompt = f""" | |
You are a highly knowledgeable assistant. Your task is to assist the user with the following context from an academic paper. | |
**Role**: {role_description} | |
**Context**: {assistant_context} | |
**Instructions**: {response_instructions} | |
**Bloom's Taxonomy Weights**: | |
Knowledge: {bloom_taxonomy_weights['Knowledge']}% | |
Comprehension: {bloom_taxonomy_weights['Comprehension']}% | |
Application: {bloom_taxonomy_weights['Application']}% | |
Analysis: {bloom_taxonomy_weights['Analysis']}% | |
Synthesis: {bloom_taxonomy_weights['Synthesis']}% | |
Evaluation: {bloom_taxonomy_weights['Evaluation']}% | |
**Query**: {user_query} | |
**Number of Questions**: {num_questions} | |
""" | |
payload = { | |
"contents": [ | |
{ | |
"parts": [ | |
{"text": prompt} | |
] | |
} | |
] | |
} | |
headers = {"Content-Type": "application/json"} | |
response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=60) | |
response.raise_for_status() | |
result = response.json() | |
questions = result.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "") | |
questions_list = [question.strip() for question in questions.split("\n") if question.strip()] | |
return questions_list | |
except requests.RequestException as e: | |
st.error(f"API request error: {e}") | |
return [] | |
except Exception as e: | |
st.error(f"Error generating questions: {e}") | |
return [] | |
def normalize_bloom_weights(bloom_weights): | |
total = sum(bloom_weights.values()) | |
if total != 100: | |
normalization_factor = 100 / total | |
# Normalize each weight by multiplying it by the normalization factor | |
bloom_weights = {key: round(value * normalization_factor, 2) for key, value in bloom_weights.items()} | |
return bloom_weights | |
def generate_pdf(questions, filename="questions.pdf"): | |
try: | |
pdf = FPDF() | |
pdf.set_auto_page_break(auto=True, margin=15) | |
pdf.add_page() | |
# Set font | |
pdf.set_font("Arial", size=12) | |
# Add a title or heading | |
pdf.cell(200, 10, txt="Generated Questions", ln=True, align="C") | |
# Add space between title and questions | |
pdf.ln(10) | |
# Loop through questions and add them to the PDF | |
for i, question in enumerate(questions, 1): | |
# Using multi_cell for wrapping the text in case it's too long | |
pdf.multi_cell(0, 10, f"Q{i}: {question}") | |
# Save the generated PDF to the file | |
pdf.output(filename) | |
return filename | |
except Exception as e: | |
st.error(f"Error generating PDF: {e}") | |
return None | |
def process_pdf_and_generate_questions(pdf_source, uploaded_file, api_key, role_description, response_instructions, bloom_taxonomy_weights, num_questions): | |
try: | |
# Get PDF path (either from URL or uploaded file) | |
pdf_path = get_pdf_path(pdf_source, uploaded_file) | |
if not pdf_path: | |
return [] | |
# Extract text | |
pdf_text = extract_text_pymupdf(pdf_path) | |
if not pdf_text: | |
return [] | |
# Generate questions | |
assistant_context = pdf_text | |
user_query = "Generate questions based on the above context." | |
normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights) | |
questions = generate_ai_response( | |
api_key, | |
assistant_context, | |
user_query, | |
role_description, | |
response_instructions, | |
normalized_bloom_weights, | |
num_questions | |
) | |
# Clean up temporary PDF file | |
try: | |
os.remove(pdf_path) | |
# Remove the temporary directory | |
os.rmdir(os.path.dirname(pdf_path)) | |
except Exception as e: | |
st.warning(f"Could not delete temporary PDF file: {e}") | |
return questions | |
except Exception as e: | |
st.error(f"Error processing PDF and generating questions: {e}") | |
return [] | |
dummydata = [ | |
{"question": "What is the main idea of the paper?", "score": { | |
"Knowledge": 10, | |
"Comprehension": 9, | |
"Application": 8, | |
"Analysis": 7, | |
"Synthesis": 6, | |
"Evaluation": 5 | |
}}, | |
{"question": "What are the key findings of the paper?", "score": { | |
"Knowledge": 9, | |
"Comprehension": 8, | |
"Application": 7, | |
"Analysis": 6, | |
"Synthesis": 5, | |
"Evaluation": 4 | |
}}, | |
{"question": "How does the paper contribute to the field?", "score": { | |
"Knowledge": 8, | |
"Comprehension": 7, | |
"Application": 6, | |
"Analysis": 5, | |
"Synthesis": 4, | |
"Evaluation": 3 | |
}}, | |
{"question": "What are the limitations of the paper?", "score": { | |
"Knowledge": 7, | |
"Comprehension": 6, | |
"Application": 5, | |
"Analysis": 4, | |
"Synthesis": 3, | |
"Evaluation": 2 | |
}}, | |
{"question": "What are the future research directions?", "score": { | |
"Knowledge": 6, | |
"Comprehension": 5, | |
"Application": 4, | |
"Analysis": 3, | |
"Synthesis": 2, | |
"Evaluation": 1 | |
}}, | |
{"question": "How does the paper compare to existing work?", "score": { | |
"Knowledge": 5, | |
"Comprehension": 4, | |
"Application": 3, | |
"Analysis": 2, | |
"Synthesis": 1, | |
"Evaluation": 0 | |
} | |
} | |
] | |
def main(): | |
st.set_page_config(page_title="Academic Paper Tool", page_icon="π", layout="wide") | |
# Tabs for different functionalities | |
tab1, tab2 = st.tabs(["Question Generator", "Paper Scorer"]) | |
if 'totalscore' not in st.session_state: | |
st.session_state.totalscore = None | |
if 'show_details' not in st.session_state: | |
st.session_state.show_details = False | |
# Question Generator Tab | |
with tab1: | |
st.title("π Academic Paper Question Generator") | |
st.markdown("Generate insightful questions from academic papers using Bloom's Taxonomy") | |
# Initialize session state variables with defaults | |
if 'pdf_source_type' not in st.session_state: | |
st.session_state.pdf_source_type = "URL" | |
if 'pdf_url' not in st.session_state: | |
st.session_state.pdf_url = "https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf" | |
if 'uploaded_file' not in st.session_state: | |
st.session_state.uploaded_file = None | |
if 'questions' not in st.session_state: | |
st.session_state.questions = [] | |
if 'accepted_questions' not in st.session_state: | |
st.session_state.accepted_questions = [] | |
# API Configuration | |
api_key = os.getenv('GEMINI_API_KEY') | |
# api_key = st.sidebar.text_input("Enter Gemini API Key", type="password", value=apivalue) | |
# Main form for PDF and question generation | |
with st.form(key='pdf_generation_form'): | |
st.header("PDF Source Configuration") | |
st.session_state.pdf_url = st.text_input( | |
"Enter the URL of the PDF", | |
key="pdf_url_input" | |
) | |
st.markdown("<h3 style='text-align: center;'>OR</h3>", unsafe_allow_html=True) | |
st.session_state.uploaded_file = st.file_uploader( | |
"Upload a PDF file", | |
type=['pdf'], | |
key="pdf_file_upload" | |
) | |
# Bloom's Taxonomy Weights | |
st.subheader("Adjust Bloom's Taxonomy Weights") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
knowledge = st.slider("Knowledge: Remembering information", 0, 100, 20, key='knowledge_slider') | |
application = st.slider("Application: Using abstractions in concrete situations", 0, 100, 20, key='application_slider') | |
with col2: | |
comprehension = st.slider("Comprehension: Explaining the meaning of information", 0, 100, 20, key='comprehension_slider') | |
analysis = st.slider("Analysis: Breaking down a whole into component parts", 0, 100, 20, key='analysis_slider') | |
with col3: | |
synthesis = st.slider("Synthesis: Putting parts together to form a new and integrated whole", 0, 100, 10, key='synthesis_slider') | |
evaluation = st.slider("Evaluation: Making and defending judgments based on internal evidence or external criteria", 0, 100, 10, key='evaluation_slider') | |
# Collect the Bloom's Taxonomy weights | |
bloom_taxonomy_weights = { | |
"Knowledge": knowledge, | |
"Comprehension": comprehension, | |
"Application": application, | |
"Analysis": analysis, | |
"Synthesis": synthesis, | |
"Evaluation": evaluation | |
} | |
# Number of questions | |
num_questions = st.slider("How many questions would you like to generate?", min_value=1, max_value=20, value=5, key='num_questions_slider') | |
# Submit button within the form | |
submit_button = st.form_submit_button(label='Generate Questions') | |
# Process form submission | |
if submit_button: | |
# Validate API key | |
if not api_key: | |
st.error("Please enter a valid Gemini API key.") | |
# Validate PDF source | |
elif not st.session_state.pdf_url and not st.session_state.uploaded_file: | |
st.error("Please enter a PDF URL or upload a PDF file.") | |
else: | |
# Normalize the Bloom's weights | |
normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights) | |
st.info("Normalized Bloom's Taxonomy Weights:") | |
st.json(normalized_bloom_weights) | |
# Role and instructions for the AI | |
role_description = "You are a question-generating AI agent, given context and instruction, you need to generate questions from the context." | |
response_instructions = "Please generate questions that are clear and relevant to the content of the paper. Generate questions which are separated by new lines, without any numbering or additional context." | |
# Generate questions | |
with st.spinner('Generating questions...'): | |
st.session_state.questions = process_pdf_and_generate_questions( | |
pdf_source=st.session_state.pdf_url if st.session_state.pdf_url else None, | |
uploaded_file=st.session_state.uploaded_file if st.session_state.uploaded_file else None, | |
api_key=api_key, | |
role_description=role_description, | |
response_instructions=response_instructions, | |
bloom_taxonomy_weights=normalized_bloom_weights, | |
num_questions=num_questions | |
) | |
if st.session_state.questions: | |
st.header("Generated Questions") | |
# Create a form for question management to prevent reload | |
with st.form(key='questions_form'): | |
for idx, question in enumerate(st.session_state.questions, 1): | |
cols = st.columns([4, 1]) # Create two columns for radio buttons (Accept, Discard) | |
with cols[0]: | |
st.write(f"Q{idx}: {question}") | |
# Use radio buttons for selection | |
with cols[1]: | |
# Default value is 'Discard', so users can change it to 'Accept' | |
selected_option = st.radio(f"Select an option for Q{idx}", ["Accept", "Discard"], key=f"radio_{idx}", index=1) | |
# Handle radio button state changes | |
if selected_option == "Accept": | |
# Add to accepted questions if 'Accept' is selected | |
if question not in st.session_state.accepted_questions: | |
st.session_state.accepted_questions.append(question) | |
else: | |
# Remove from accepted questions if 'Discard' is selected | |
if question in st.session_state.accepted_questions: | |
st.session_state.accepted_questions.remove(question) | |
# Submit button for question selection | |
submit_questions = st.form_submit_button("Update Accepted Questions") | |
# Show accepted questions | |
if st.session_state.accepted_questions: | |
st.header("Accepted Questions") | |
for q in st.session_state.accepted_questions: | |
st.write(q) | |
# Download button for accepted questions | |
if st.button("Download Accepted Questions as PDF"): | |
filename = generate_pdf(st.session_state.accepted_questions, filename="accepted_questions.pdf") | |
if filename: | |
with open(filename, "rb") as pdf_file: | |
st.download_button( | |
label="Click to Download PDF", | |
data=pdf_file, | |
file_name="accepted_questions.pdf", | |
mime="application/pdf" | |
) | |
st.success("PDF generated successfully!") | |
else: | |
st.info("No questions selected yet.") | |
# Add some footer information | |
st.markdown("---") | |
st.markdown(""" | |
### About this Tool | |
- Generate academic paper questions using Bloom's Taxonomy | |
- Customize question generation weights | |
- Select and refine generated questions | |
- Support for PDF via URL or local upload | |
""") | |
with tab2: | |
st.title("π Academic Paper Scorer") | |
# Add a descriptive subheader | |
st.markdown("### Evaluate the Quality of Your Academic Paper") | |
# Create a styled container for the upload section | |
st.markdown(""" | |
<style> | |
.upload-container { | |
background-color: #f0f2f6; | |
border-radius: 10px; | |
padding: 20px; | |
border: 2px dashed #4a6cf7; | |
text-align: center; | |
} | |
.score-breakdown { | |
background-color: #f8f9fa; | |
border-radius: 8px; | |
padding: 15px; | |
margin-bottom: 15px; | |
} | |
.score-header { | |
font-weight: bold; | |
color: #4a6cf7; | |
margin-bottom: 10px; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
with st.form(key='paper_scorer_form'): | |
st.header("Upload Your Academic Paper") | |
uploaded_file = st.file_uploader( | |
"Choose a PDF file", | |
type=['pdf','jpg','png','jpeg'], | |
label_visibility="collapsed" | |
) | |
# Custom submit button with some styling | |
submit_button = st.form_submit_button( | |
"Score Paper", | |
use_container_width=True, | |
type="primary" | |
) | |
if submit_button: | |
# Calculate total score | |
total_score = sum( | |
sum(question['score'].values()) | |
for question in dummydata | |
) | |
average_score = total_score / (len(dummydata) * 6 * 10) * 100 | |
# Score display columns | |
col1, col2 = st.columns([2,1]) | |
with col1: | |
st.metric(label="Total Paper Score", value=f"{average_score:.2f}/100") | |
with st.expander("Show Detailed Scores", expanded=True): | |
for idx, item in enumerate(dummydata, 1): | |
# Question header | |
st.markdown(f'<div class="score-header">Question {idx}: {item["question"]}</div>', unsafe_allow_html=True) | |
# Create columns for score display | |
score_cols = st.columns(6) | |
# Scoring categories | |
categories = ['Knowledge', 'Comprehension', 'Application', 'Analysis', 'Synthesis', 'Evaluation'] | |
for col, category in zip(score_cols, categories): | |
with col: | |
# Determine color based on score | |
score = item['score'][category] | |
color = 'green' if score > 7 else 'orange' if score > 4 else 'red' | |
st.markdown(f""" | |
<div style="text-align: center; | |
background-color: #f1f1f1; | |
border-radius: 5px; | |
padding: 5px; | |
margin-bottom: 5px;"> | |
<div style="font-weight: bold; color: {color};">{category}</div> | |
<div style="font-size: 18px; color: {color};">{score}/10</div> | |
</div> | |
""", unsafe_allow_html=True) | |
st.markdown('</div>', unsafe_allow_html=True) | |
# Add a separator between questions | |
if idx < len(dummydata): | |
st.markdown('---') | |
# but = st.button("Show Detailed Scores") | |
# if but: | |
# st.write("Detailed Scores") | |
# with st.container(): | |
# for key, value in dummydata.items(): | |
# st.write(f"{key}: {value}") | |
# Run Streamlit app | |
if __name__ == "__main__": | |
main() |