Spaces:

omkar-surve126
/

NovaScholar

Build error

App Files Files Community

NovaScholar / extract.py

omkar-surve126

Upload 38 files

b91146d verified 6 months ago

raw

history blame contribute delete

5.82 kB

	import streamlit as st
	import pandas as pd
	import PyPDF2
	import io
	import os
	from dotenv import load_dotenv
	import requests
	import time

	# Load environment variables
	load_dotenv()
	PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
	PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"

	def call_perplexity_api(prompt: str) -> str:
	"""Call Perplexity AI with a prompt, return the text response if successful."""
	headers = {
	"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
	"Content-Type": "application/json",
	}

	payload = {
	"model": "llama-3.1-sonar-small-128k-chat",
	"messages": [{"role": "user", "content": prompt}],
	"temperature": 0.3,
	}

	try:
	response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
	response.raise_for_status()
	return response.json()["choices"][0]["message"]["content"]
	except Exception as e:
	st.error(f"API Error: {str(e)}")
	return ""

	def extract_text_from_pdf(pdf_file):
	"""Extract text content from a PDF file."""
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text() + "\n"
	return text

	def analyze_paper(text: str, category: str) -> str:
	"""Generate a prompt and get analysis for a specific category."""
	prompts = {
	"Summarized Abstract": "Extract and summarize the abstract from this research paper:",
	"Results": "What are the main results and findings from this research paper:",
	"Summarized Introduction": "Summarize the introduction section of this research paper:",
	"Methods Used": "What are the main methods and methodologies used in this research:",
	"Literature Survey": "Summarize the literature review or related work from this paper:",
	"Limitations": "What are the limitations mentioned in this research:",
	"Contributions": "What are the main contributions of this research:",
	"Practical Implications": "What are the practical implications of this research:",
	"Objectives": "What are the main objectives of this research:",
	"Findings": "What are the key findings from this research:",
	"Future Research": "What future research directions are suggested in this paper:",
	"Dependent Variables": "What are the dependent variables studied in this research:",
	"Independent Variables": "What are the independent variables studied in this research:",
	"Dataset": "What dataset(s) were used in this research:",
	"Problem Statement": "What is the main problem statement or research question:",
	"Challenges": "What challenges were faced or addressed in this research:",
	"Applications": "What are the potential applications of this research:"
	}

	prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
	return call_perplexity_api(prompt)

	def main():
	st.title("Research Paper Analysis Tool")

	# File uploader
	uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)

	if uploaded_files:
	if st.button("Process Papers"):
	# Initialize progress bar
	progress_bar = st.progress(0)
	status_text = st.empty()

	# Initialize results dictionary
	results = []

	# Define categories
	categories = [
	"Summarized Abstract", "Results", "Summarized Introduction",
	"Methods Used", "Literature Survey", "Limitations",
	"Contributions", "Practical Implications", "Objectives",
	"Findings", "Future Research", "Dependent Variables",
	"Independent Variables", "Dataset", "Problem Statement",
	"Challenges", "Applications"
	]

	# Process each file
	for i, file in enumerate(uploaded_files):
	status_text.text(f"Processing {file.name}...")

	# Extract text from PDF
	text = extract_text_from_pdf(file)

	# Initialize paper results
	paper_results = {"Filename": file.name}

	# Analyze each category
	for j, category in enumerate(categories):
	status_text.text(f"Processing {file.name} - {category}")
	paper_results[category] = analyze_paper(text, category)

	# Update progress
	progress = (i * len(categories) + j + 1) / (len(uploaded_files) * len(categories))
	progress_bar.progress(progress)

	# Add small delay to avoid API rate limits
	time.sleep(1)

	results.append(paper_results)

	# Create DataFrame
	df = pd.DataFrame(results)

	# Convert DataFrame to CSV
	csv = df.to_csv(index=False)

	# Create download button
	st.download_button(
	label="Download Results as CSV",
	data=csv,
	file_name="research_papers_analysis.csv",
	mime="text/csv"
	)

	# Display results in the app
	st.subheader("Analysis Results")
	st.dataframe(df)

	status_text.text("Processing complete!")
	progress_bar.progress(1.0)

	if __name__ == "__main__":
	main()