Spaces:

AruniAnkur
/

BloomScore

Sleeping

App Files Files Community

BloomScore / app.py

AruniAnkur

added a large thing

8c84d02 verified 7 months ago

raw

history blame

20.7 kB

	import streamlit as st
	import requests
	import json
	import fitz # PyMuPDF
	from fpdf import FPDF
	import os
	import tempfile
	import base64
	import dotenv
	from dotenv import load_dotenv

	load_dotenv()

	# Previous functions from Question Generator
	def get_pdf_path(pdf_source=None, uploaded_file=None):
	try:
	# If a file is uploaded locally
	if uploaded_file is not None:
	# Create a temporary file to save the uploaded PDF
	temp_dir = tempfile.mkdtemp()
	pdf_path = os.path.join(temp_dir, uploaded_file.name)

	# Save the uploaded file
	with open(pdf_path, "wb") as pdf_file:
	pdf_file.write(uploaded_file.getvalue())
	return pdf_path

	# If a URL is provided
	if pdf_source:
	response = requests.get(pdf_source, timeout=30)
	response.raise_for_status()

	# Create a temporary file
	temp_dir = tempfile.mkdtemp()
	pdf_path = os.path.join(temp_dir, "downloaded.pdf")

	with open(pdf_path, "wb") as pdf_file:
	pdf_file.write(response.content)
	return pdf_path

	# If no source is provided
	st.error("No PDF source provided.")
	return None
	except Exception as e:
	st.error(f"Error getting PDF: {e}")
	return None

	def extract_text_pymupdf(pdf_path):
	try:
	doc = fitz.open(pdf_path)
	pages_content = []
	for page_num in range(len(doc)):
	page = doc[page_num]
	pages_content.append(page.get_text())
	doc.close()
	return " ".join(pages_content) # Join all pages into one large context string
	except Exception as e:
	st.error(f"Error extracting text from PDF: {e}")
	return ""

	def generate_ai_response(api_key, assistant_context, user_query, role_description, response_instructions, bloom_taxonomy_weights, num_questions):
	try:
	url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={api_key}"

	prompt = f"""
	You are a highly knowledgeable assistant. Your task is to assist the user with the following context from an academic paper.

	Role: {role_description}

	Context: {assistant_context}

	Instructions: {response_instructions}

	Bloom's Taxonomy Weights:
	Knowledge: {bloom_taxonomy_weights['Knowledge']}%
	Comprehension: {bloom_taxonomy_weights['Comprehension']}%
	Application: {bloom_taxonomy_weights['Application']}%
	Analysis: {bloom_taxonomy_weights['Analysis']}%
	Synthesis: {bloom_taxonomy_weights['Synthesis']}%
	Evaluation: {bloom_taxonomy_weights['Evaluation']}%

	Query: {user_query}

	Number of Questions: {num_questions}
	"""

	payload = {
	"contents": [
	{
	"parts": [
	{"text": prompt}
	]
	}
	]
	}
	headers = {"Content-Type": "application/json"}

	response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=60)
	response.raise_for_status()

	result = response.json()
	questions = result.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
	questions_list = [question.strip() for question in questions.split("\n") if question.strip()]
	return questions_list
	except requests.RequestException as e:
	st.error(f"API request error: {e}")
	return []
	except Exception as e:
	st.error(f"Error generating questions: {e}")
	return []

	def normalize_bloom_weights(bloom_weights):
	total = sum(bloom_weights.values())
	if total != 100:
	normalization_factor = 100 / total
	# Normalize each weight by multiplying it by the normalization factor
	bloom_weights = {key: round(value * normalization_factor, 2) for key, value in bloom_weights.items()}
	return bloom_weights

	def generate_pdf(questions, filename="questions.pdf"):
	try:
	pdf = FPDF()
	pdf.set_auto_page_break(auto=True, margin=15)
	pdf.add_page()

	# Set font
	pdf.set_font("Arial", size=12)

	# Add a title or heading
	pdf.cell(200, 10, txt="Generated Questions", ln=True, align="C")

	# Add space between title and questions
	pdf.ln(10)

	# Loop through questions and add them to the PDF
	for i, question in enumerate(questions, 1):
	# Using multi_cell for wrapping the text in case it's too long
	pdf.multi_cell(0, 10, f"Q{i}: {question}")

	# Save the generated PDF to the file
	pdf.output(filename)
	return filename
	except Exception as e:
	st.error(f"Error generating PDF: {e}")
	return None

	def process_pdf_and_generate_questions(pdf_source, uploaded_file, api_key, role_description, response_instructions, bloom_taxonomy_weights, num_questions):
	try:
	# Get PDF path (either from URL or uploaded file)
	pdf_path = get_pdf_path(pdf_source, uploaded_file)
	if not pdf_path:
	return []

	# Extract text
	pdf_text = extract_text_pymupdf(pdf_path)
	if not pdf_text:
	return []

	# Generate questions
	assistant_context = pdf_text
	user_query = "Generate questions based on the above context."
	normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights)
	questions = generate_ai_response(
	api_key,
	assistant_context,
	user_query,
	role_description,
	response_instructions,
	normalized_bloom_weights,
	num_questions
	)

	# Clean up temporary PDF file
	try:
	os.remove(pdf_path)
	# Remove the temporary directory
	os.rmdir(os.path.dirname(pdf_path))
	except Exception as e:
	st.warning(f"Could not delete temporary PDF file: {e}")

	return questions
	except Exception as e:
	st.error(f"Error processing PDF and generating questions: {e}")
	return []

	dummydata = [
	{"question": "What is the main idea of the paper?", "score": {
	"Knowledge": 10,
	"Comprehension": 9,
	"Application": 8,
	"Analysis": 7,
	"Synthesis": 6,
	"Evaluation": 5
	}},
	{"question": "What are the key findings of the paper?", "score": {
	"Knowledge": 9,
	"Comprehension": 8,
	"Application": 7,
	"Analysis": 6,
	"Synthesis": 5,
	"Evaluation": 4
	}},
	{"question": "How does the paper contribute to the field?", "score": {
	"Knowledge": 8,
	"Comprehension": 7,
	"Application": 6,
	"Analysis": 5,
	"Synthesis": 4,
	"Evaluation": 3
	}},
	{"question": "What are the limitations of the paper?", "score": {
	"Knowledge": 7,
	"Comprehension": 6,
	"Application": 5,
	"Analysis": 4,
	"Synthesis": 3,
	"Evaluation": 2
	}},
	{"question": "What are the future research directions?", "score": {
	"Knowledge": 6,
	"Comprehension": 5,
	"Application": 4,
	"Analysis": 3,
	"Synthesis": 2,
	"Evaluation": 1
	}},
	{"question": "How does the paper compare to existing work?", "score": {
	"Knowledge": 5,
	"Comprehension": 4,
	"Application": 3,
	"Analysis": 2,
	"Synthesis": 1,
	"Evaluation": 0
	}
	}

	]

	def main():
	st.set_page_config(page_title="Academic Paper Tool", page_icon="📝", layout="wide")

	# Tabs for different functionalities
	tab1, tab2 = st.tabs(["Question Generator", "Paper Scorer"])

	if 'totalscore' not in st.session_state:
	st.session_state.totalscore = None
	if 'show_details' not in st.session_state:
	st.session_state.show_details = False


	# Question Generator Tab
	with tab1:
	st.title("🎓 Academic Paper Question Generator")
	st.markdown("Generate insightful questions from academic papers using Bloom's Taxonomy")

	# Initialize session state variables with defaults
	if 'pdf_source_type' not in st.session_state:
	st.session_state.pdf_source_type = "URL"
	if 'pdf_url' not in st.session_state:
	st.session_state.pdf_url = "https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"
	if 'uploaded_file' not in st.session_state:
	st.session_state.uploaded_file = None
	if 'questions' not in st.session_state:
	st.session_state.questions = []
	if 'accepted_questions' not in st.session_state:
	st.session_state.accepted_questions = []

	# API Configuration
	api_key = os.getenv('GEMINI_API_KEY')
	# api_key = st.sidebar.text_input("Enter Gemini API Key", type="password", value=apivalue)

	# Main form for PDF and question generation
	with st.form(key='pdf_generation_form'):
	st.header("PDF Source Configuration")

	st.session_state.pdf_url = st.text_input(
	"Enter the URL of the PDF",
	key="pdf_url_input"
	)

	st.markdown("<h3 style='text-align: center;'>OR</h3>", unsafe_allow_html=True)

	st.session_state.uploaded_file = st.file_uploader(
	"Upload a PDF file",
	type=['pdf'],
	key="pdf_file_upload"
	)

	# Bloom's Taxonomy Weights
	st.subheader("Adjust Bloom's Taxonomy Weights")
	col1, col2, col3 = st.columns(3)

	with col1:
	knowledge = st.slider("Knowledge: Remembering information", 0, 100, 20, key='knowledge_slider')
	application = st.slider("Application: Using abstractions in concrete situations", 0, 100, 20, key='application_slider')

	with col2:
	comprehension = st.slider("Comprehension: Explaining the meaning of information", 0, 100, 20, key='comprehension_slider')
	analysis = st.slider("Analysis: Breaking down a whole into component parts", 0, 100, 20, key='analysis_slider')

	with col3:
	synthesis = st.slider("Synthesis: Putting parts together to form a new and integrated whole", 0, 100, 10, key='synthesis_slider')
	evaluation = st.slider("Evaluation: Making and defending judgments based on internal evidence or external criteria", 0, 100, 10, key='evaluation_slider')

	# Collect the Bloom's Taxonomy weights
	bloom_taxonomy_weights = {
	"Knowledge": knowledge,
	"Comprehension": comprehension,
	"Application": application,
	"Analysis": analysis,
	"Synthesis": synthesis,
	"Evaluation": evaluation
	}

	# Number of questions
	num_questions = st.slider("How many questions would you like to generate?", min_value=1, max_value=20, value=5, key='num_questions_slider')

	# Submit button within the form
	submit_button = st.form_submit_button(label='Generate Questions')

	# Process form submission
	if submit_button:
	# Validate API key
	if not api_key:
	st.error("Please enter a valid Gemini API key.")
	# Validate PDF source
	elif not st.session_state.pdf_url and not st.session_state.uploaded_file:
	st.error("Please enter a PDF URL or upload a PDF file.")
	else:
	# Normalize the Bloom's weights
	normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights)

	st.info("Normalized Bloom's Taxonomy Weights:")
	st.json(normalized_bloom_weights)

	# Role and instructions for the AI
	role_description = "You are a question-generating AI agent, given context and instruction, you need to generate questions from the context."
	response_instructions = "Please generate questions that are clear and relevant to the content of the paper. Generate questions which are separated by new lines, without any numbering or additional context."

	# Generate questions
	with st.spinner('Generating questions...'):
	st.session_state.questions = process_pdf_and_generate_questions(
	pdf_source=st.session_state.pdf_url if st.session_state.pdf_url else None,
	uploaded_file=st.session_state.uploaded_file if st.session_state.uploaded_file else None,
	api_key=api_key,
	role_description=role_description,
	response_instructions=response_instructions,
	bloom_taxonomy_weights=normalized_bloom_weights,
	num_questions=num_questions
	)
	if st.session_state.questions:
	st.header("Generated Questions")

	# Create a form for question management to prevent reload
	with st.form(key='questions_form'):
	for idx, question in enumerate(st.session_state.questions, 1):
	cols = st.columns([4, 1]) # Create two columns for radio buttons (Accept, Discard)

	with cols[0]:
	st.write(f"Q{idx}: {question}")

	# Use radio buttons for selection
	with cols[1]:
	# Default value is 'Discard', so users can change it to 'Accept'
	selected_option = st.radio(f"Select an option for Q{idx}", ["Accept", "Discard"], key=f"radio_{idx}", index=1)

	# Handle radio button state changes
	if selected_option == "Accept":
	# Add to accepted questions if 'Accept' is selected
	if question not in st.session_state.accepted_questions:
	st.session_state.accepted_questions.append(question)
	else:
	# Remove from accepted questions if 'Discard' is selected
	if question in st.session_state.accepted_questions:
	st.session_state.accepted_questions.remove(question)

	# Submit button for question selection
	submit_questions = st.form_submit_button("Update Accepted Questions")


	# Show accepted questions
	if st.session_state.accepted_questions:
	st.header("Accepted Questions")
	for q in st.session_state.accepted_questions:
	st.write(q)

	# Download button for accepted questions
	if st.button("Download Accepted Questions as PDF"):
	filename = generate_pdf(st.session_state.accepted_questions, filename="accepted_questions.pdf")
	if filename:
	with open(filename, "rb") as pdf_file:
	st.download_button(
	label="Click to Download PDF",
	data=pdf_file,
	file_name="accepted_questions.pdf",
	mime="application/pdf"
	)
	st.success("PDF generated successfully!")
	else:
	st.info("No questions selected yet.")

	# Add some footer information
	st.markdown("---")
	st.markdown("""
	### About this Tool
	- Generate academic paper questions using Bloom's Taxonomy
	- Customize question generation weights
	- Select and refine generated questions
	- Support for PDF via URL or local upload
	""")
	with tab2:
	st.title("📄 Academic Paper Scorer")

	# Add a descriptive subheader
	st.markdown("### Evaluate the Quality of Your Academic Paper")

	# Create a styled container for the upload section
	st.markdown("""
	<style>
	.upload-container {
	background-color: #f0f2f6;
	border-radius: 10px;
	padding: 20px;
	border: 2px dashed #4a6cf7;
	text-align: center;
	}
	.score-breakdown {
	background-color: #f8f9fa;
	border-radius: 8px;
	padding: 15px;
	margin-bottom: 15px;
	}
	.score-header {
	font-weight: bold;
	color: #4a6cf7;
	margin-bottom: 10px;
	}
	</style>
	""", unsafe_allow_html=True)

	with st.form(key='paper_scorer_form'):
	st.header("Upload Your Academic Paper")
	uploaded_file = st.file_uploader(
	"Choose a PDF file",
	type=['pdf','jpg','png','jpeg'],
	label_visibility="collapsed"
	)

	# Custom submit button with some styling
	submit_button = st.form_submit_button(
	"Score Paper",
	use_container_width=True,
	type="primary"
	)

	if submit_button:
	# Calculate total score
	total_score = sum(
	sum(question['score'].values())
	for question in dummydata
	)
	average_score = total_score / (len(dummydata) * 6 * 10) * 100

	# Score display columns
	col1, col2 = st.columns([2,1])

	with col1:
	st.metric(label="Total Paper Score", value=f"{average_score:.2f}/100")

	with st.expander("Show Detailed Scores", expanded=True):
	for idx, item in enumerate(dummydata, 1):

	# Question header
	st.markdown(f'<div class="score-header">Question {idx}: {item["question"]}</div>', unsafe_allow_html=True)

	# Create columns for score display
	score_cols = st.columns(6)

	# Scoring categories
	categories = ['Knowledge', 'Comprehension', 'Application', 'Analysis', 'Synthesis', 'Evaluation']

	for col, category in zip(score_cols, categories):
	with col:
	# Determine color based on score
	score = item['score'][category]
	color = 'green' if score > 7 else 'orange' if score > 4 else 'red'

	st.markdown(f"""
	<div style="text-align: center;
	background-color: #f1f1f1;
	border-radius: 5px;
	padding: 5px;
	margin-bottom: 5px;">
	<div style="font-weight: bold; color: {color};">{category}</div>
	<div style="font-size: 18px; color: {color};">{score}/10</div>
	</div>
	""", unsafe_allow_html=True)

	st.markdown('</div>', unsafe_allow_html=True)

	# Add a separator between questions
	if idx < len(dummydata):
	st.markdown('---')
	# but = st.button("Show Detailed Scores")
	# if but:
	# st.write("Detailed Scores")
	# with st.container():
	# for key, value in dummydata.items():
	# st.write(f"{key}: {value}")

	# Run Streamlit app
	if __name__ == "__main__":
	main()