import os import streamlit as st from PyPDF2 import PdfReader from sentence_transformers import SentenceTransformer import faiss import numpy as np import matplotlib.pyplot as plt from groq import Groq GROQ_API_KEY = "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" client = Groq(api_key=GROQ_API_KEY) # Initialize embedding model embedding_model = SentenceTransformer('all-MiniLM-L6-v2') # Open-source model # Initialize FAISS index dimension = 384 # Embedding size of the model index = faiss.IndexFlatL2(dimension) # Helper Functions def extract_text_from_pdfs(files): """Extract text from multiple PDF files.""" all_text = "" for file in files: reader = PdfReader(file) for page in reader.pages: all_text += page.extract_text() return all_text def create_chunks(text, chunk_size=500): """Split text into chunks of a specified size.""" words = text.split() return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] def generate_embeddings(chunks): """Generate embeddings for the given chunks.""" return embedding_model.encode(chunks, convert_to_numpy=True) def query_groq(prompt): """Query the Groq model for a response.""" response = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-8b-8192" ) return response.choices[0].message.content def visualize_comparative_insights(methods, results): """Generate a scatter plot of methods vs. results.""" plt.figure(figsize=(8, 6)) plt.scatter(methods, results, c='blue', alpha=0.7) plt.title("Methods vs. Results") plt.xlabel("Methods") plt.ylabel("Results") st.pyplot(plt) # Streamlit App st.title("Research Paper Analyzer") st.write("Upload research papers, ask questions, and gain AI-driven insights!") # Step 1: PDF Upload uploaded_files = st.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True) if uploaded_files: with st.spinner("Processing the PDFs..."): # Extract text pdf_text = extract_text_from_pdfs(uploaded_files) st.success("PDFs processed successfully!") # Step 2: Chunking chunks = create_chunks(pdf_text) st.info(f"Documents split into {len(chunks)} chunks.") # Step 3: Embedding Creation embeddings = generate_embeddings(chunks) index.add(np.array(embeddings)) st.success("Embeddings stored in FAISS database.") # Step 4: Query user_query = st.text_input("Ask a question:") if user_query: with st.spinner("Searching and generating a response..."): # Embed user query query_embedding = embedding_model.encode([user_query], convert_to_numpy=True) # Search in FAISS distances, indices = index.search(np.array(query_embedding), k=5) relevant_chunks = [chunks[i] for i in indices[0]] # Combine retrieved chunks as context context = " ".join(relevant_chunks) # Query Groq model prompt = f"Context: {context}\n\nQuestion: {user_query}\n\nAnswer:" answer = query_groq(prompt) # Display response st.write("### Answer:") st.write(answer) # Step 5: Comparative Insights if st.button("Generate Comparative Insights"): # Example data for visualization methods = [1, 2, 3, 4, 5] # Replace with actual methods data results = [3.2, 4.1, 5.6, 4.8, 6.0] # Replace with actual results data visualize_comparative_insights(methods, results) # Step 6: Bibliography Suggestions if st.button("Suggest Related Papers"): related_papers = ["Paper A", "Paper B", "Paper C"] # Replace with actual suggestions st.write("### Suggested Papers:") for paper in related_papers: st.write(f"- {paper}")