Spaces:
Build error
Build error
File size: 5,820 Bytes
b91146d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import streamlit as st
import pandas as pd
import PyPDF2
import io
import os
from dotenv import load_dotenv
import requests
import time
# Load environment variables
load_dotenv()
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
def call_perplexity_api(prompt: str) -> str:
"""Call Perplexity AI with a prompt, return the text response if successful."""
headers = {
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
"Content-Type": "application/json",
}
payload = {
"model": "llama-3.1-sonar-small-128k-chat",
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.3,
}
try:
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"]
except Exception as e:
st.error(f"API Error: {str(e)}")
return ""
def extract_text_from_pdf(pdf_file):
"""Extract text content from a PDF file."""
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text() + "\n"
return text
def analyze_paper(text: str, category: str) -> str:
"""Generate a prompt and get analysis for a specific category."""
prompts = {
"Summarized Abstract": "Extract and summarize the abstract from this research paper:",
"Results": "What are the main results and findings from this research paper:",
"Summarized Introduction": "Summarize the introduction section of this research paper:",
"Methods Used": "What are the main methods and methodologies used in this research:",
"Literature Survey": "Summarize the literature review or related work from this paper:",
"Limitations": "What are the limitations mentioned in this research:",
"Contributions": "What are the main contributions of this research:",
"Practical Implications": "What are the practical implications of this research:",
"Objectives": "What are the main objectives of this research:",
"Findings": "What are the key findings from this research:",
"Future Research": "What future research directions are suggested in this paper:",
"Dependent Variables": "What are the dependent variables studied in this research:",
"Independent Variables": "What are the independent variables studied in this research:",
"Dataset": "What dataset(s) were used in this research:",
"Problem Statement": "What is the main problem statement or research question:",
"Challenges": "What challenges were faced or addressed in this research:",
"Applications": "What are the potential applications of this research:"
}
prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
return call_perplexity_api(prompt)
def main():
st.title("Research Paper Analysis Tool")
# File uploader
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
if uploaded_files:
if st.button("Process Papers"):
# Initialize progress bar
progress_bar = st.progress(0)
status_text = st.empty()
# Initialize results dictionary
results = []
# Define categories
categories = [
"Summarized Abstract", "Results", "Summarized Introduction",
"Methods Used", "Literature Survey", "Limitations",
"Contributions", "Practical Implications", "Objectives",
"Findings", "Future Research", "Dependent Variables",
"Independent Variables", "Dataset", "Problem Statement",
"Challenges", "Applications"
]
# Process each file
for i, file in enumerate(uploaded_files):
status_text.text(f"Processing {file.name}...")
# Extract text from PDF
text = extract_text_from_pdf(file)
# Initialize paper results
paper_results = {"Filename": file.name}
# Analyze each category
for j, category in enumerate(categories):
status_text.text(f"Processing {file.name} - {category}")
paper_results[category] = analyze_paper(text, category)
# Update progress
progress = (i * len(categories) + j + 1) / (len(uploaded_files) * len(categories))
progress_bar.progress(progress)
# Add small delay to avoid API rate limits
time.sleep(1)
results.append(paper_results)
# Create DataFrame
df = pd.DataFrame(results)
# Convert DataFrame to CSV
csv = df.to_csv(index=False)
# Create download button
st.download_button(
label="Download Results as CSV",
data=csv,
file_name="research_papers_analysis.csv",
mime="text/csv"
)
# Display results in the app
st.subheader("Analysis Results")
st.dataframe(df)
status_text.text("Processing complete!")
progress_bar.progress(1.0)
if __name__ == "__main__":
main() |