|
import os |
|
import streamlit as st |
|
from PyPDF2 import PdfReader |
|
from sentence_transformers import SentenceTransformer |
|
import faiss |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from groq import Groq |
|
|
|
GROQ_API_KEY = "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" |
|
client = Groq(api_key=GROQ_API_KEY) |
|
|
|
|
|
embedding_model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
|
|
dimension = 384 |
|
index = faiss.IndexFlatL2(dimension) |
|
|
|
|
|
def extract_text_from_pdfs(files): |
|
"""Extract text from multiple PDF files.""" |
|
all_text = "" |
|
for file in files: |
|
reader = PdfReader(file) |
|
for page in reader.pages: |
|
all_text += page.extract_text() |
|
return all_text |
|
|
|
def create_chunks(text, chunk_size=500): |
|
"""Split text into chunks of a specified size.""" |
|
words = text.split() |
|
return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] |
|
|
|
def generate_embeddings(chunks): |
|
"""Generate embeddings for the given chunks.""" |
|
return embedding_model.encode(chunks, convert_to_numpy=True) |
|
|
|
def query_groq(prompt): |
|
"""Query the Groq model for a response.""" |
|
response = client.chat.completions.create( |
|
messages=[{"role": "user", "content": prompt}], |
|
model="llama3-8b-8192" |
|
) |
|
return response.choices[0].message.content |
|
|
|
def visualize_comparative_insights(methods, results): |
|
"""Generate a scatter plot of methods vs. results.""" |
|
plt.figure(figsize=(8, 6)) |
|
plt.scatter(methods, results, c='blue', alpha=0.7) |
|
plt.title("Methods vs. Results") |
|
plt.xlabel("Methods") |
|
plt.ylabel("Results") |
|
st.pyplot(plt) |
|
|
|
|
|
st.title("Research Paper Analyzer") |
|
st.write("Upload research papers, ask questions, and gain AI-driven insights!") |
|
|
|
|
|
uploaded_files = st.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True) |
|
if uploaded_files: |
|
with st.spinner("Processing the PDFs..."): |
|
|
|
pdf_text = extract_text_from_pdfs(uploaded_files) |
|
st.success("PDFs processed successfully!") |
|
|
|
|
|
chunks = create_chunks(pdf_text) |
|
st.info(f"Documents split into {len(chunks)} chunks.") |
|
|
|
|
|
embeddings = generate_embeddings(chunks) |
|
index.add(np.array(embeddings)) |
|
st.success("Embeddings stored in FAISS database.") |
|
|
|
|
|
user_query = st.text_input("Ask a question:") |
|
if user_query: |
|
with st.spinner("Searching and generating a response..."): |
|
|
|
query_embedding = embedding_model.encode([user_query], convert_to_numpy=True) |
|
|
|
|
|
distances, indices = index.search(np.array(query_embedding), k=5) |
|
relevant_chunks = [chunks[i] for i in indices[0]] |
|
|
|
|
|
context = " ".join(relevant_chunks) |
|
|
|
|
|
prompt = f"Context: {context}\n\nQuestion: {user_query}\n\nAnswer:" |
|
answer = query_groq(prompt) |
|
|
|
|
|
st.write("### Answer:") |
|
st.write(answer) |
|
|
|
|
|
if st.button("Generate Comparative Insights"): |
|
|
|
methods = [1, 2, 3, 4, 5] |
|
results = [3.2, 4.1, 5.6, 4.8, 6.0] |
|
visualize_comparative_insights(methods, results) |
|
|
|
|
|
if st.button("Suggest Related Papers"): |
|
related_papers = ["Paper A", "Paper B", "Paper C"] |
|
st.write("### Suggested Papers:") |
|
for paper in related_papers: |
|
st.write(f"- {paper}") |
|
|