import streamlit as st import pandas as pd import PyPDF2 import io import os from dotenv import load_dotenv import requests import time # Load environment variables load_dotenv() PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY") PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions" def call_perplexity_api(prompt: str) -> str: """Call Perplexity AI with a prompt, return the text response if successful.""" headers = { "Authorization": f"Bearer {PERPLEXITY_API_KEY}", "Content-Type": "application/json", } payload = { "model": "llama-3.1-sonar-small-128k-chat", "messages": [{"role": "user", "content": prompt}], "temperature": 0.3, } try: response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload) response.raise_for_status() return response.json()["choices"][0]["message"]["content"] except Exception as e: st.error(f"API Error: {str(e)}") return "" def extract_text_from_pdf(pdf_file): """Extract text content from a PDF file.""" pdf_reader = PyPDF2.PdfReader(pdf_file) text = "" for page in pdf_reader.pages: text += page.extract_text() + "\n" return text def analyze_paper(text: str, category: str) -> str: """Generate a prompt and get analysis for a specific category.""" prompts = { "Summarized Abstract": "Extract and summarize the abstract from this research paper:", "Results": "What are the main results and findings from this research paper:", "Summarized Introduction": "Summarize the introduction section of this research paper:", "Methods Used": "What are the main methods and methodologies used in this research:", "Literature Survey": "Summarize the literature review or related work from this paper:", "Limitations": "What are the limitations mentioned in this research:", "Contributions": "What are the main contributions of this research:", "Practical Implications": "What are the practical implications of this research:", "Objectives": "What are the main objectives of this research:", "Findings": "What are the key findings from this research:", "Future Research": "What future research directions are suggested in this paper:", "Dependent Variables": "What are the dependent variables studied in this research:", "Independent Variables": "What are the independent variables studied in this research:", "Dataset": "What dataset(s) were used in this research:", "Problem Statement": "What is the main problem statement or research question:", "Challenges": "What challenges were faced or addressed in this research:", "Applications": "What are the potential applications of this research:" } prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits return call_perplexity_api(prompt) def main(): st.title("Research Paper Analysis Tool") # File uploader uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True) if uploaded_files: if st.button("Process Papers"): # Initialize progress bar progress_bar = st.progress(0) status_text = st.empty() # Initialize results dictionary results = [] # Define categories categories = [ "Summarized Abstract", "Results", "Summarized Introduction", "Methods Used", "Literature Survey", "Limitations", "Contributions", "Practical Implications", "Objectives", "Findings", "Future Research", "Dependent Variables", "Independent Variables", "Dataset", "Problem Statement", "Challenges", "Applications" ] # Process each file for i, file in enumerate(uploaded_files): status_text.text(f"Processing {file.name}...") # Extract text from PDF text = extract_text_from_pdf(file) # Initialize paper results paper_results = {"Filename": file.name} # Analyze each category for j, category in enumerate(categories): status_text.text(f"Processing {file.name} - {category}") paper_results[category] = analyze_paper(text, category) # Update progress progress = (i * len(categories) + j + 1) / (len(uploaded_files) * len(categories)) progress_bar.progress(progress) # Add small delay to avoid API rate limits time.sleep(1) results.append(paper_results) # Create DataFrame df = pd.DataFrame(results) # Convert DataFrame to CSV csv = df.to_csv(index=False) # Create download button st.download_button( label="Download Results as CSV", data=csv, file_name="research_papers_analysis.csv", mime="text/csv" ) # Display results in the app st.subheader("Analysis Results") st.dataframe(df) status_text.text("Processing complete!") progress_bar.progress(1.0) if __name__ == "__main__": main()