Spaces:
Sleeping
Sleeping
File size: 3,092 Bytes
704d3c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import os
import faiss
import streamlit as st
from groq import Groq
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import numpy as np
from streamlit_option_menu import option_menu
# Groq API Key
client = Groq(api_key="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")
# Load embedding model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# FAISS Index
dimension = 384
index = faiss.IndexFlatL2(dimension)
# Sidebar navigation
with st.sidebar:
selected = option_menu(
"Research Article Helper",
["Home", "Upload PDF", "Summary", "About"],
icons=["house", "file-earmark-arrow-up", "file-earmark-text", "info-circle"],
menu_icon="cast",
default_index=0,
)
# App title
st.title("π Research Article Helper")
st.write("Interact with research articles by uploading PDFs and asking questions.")
# "Upload PDF" Section
if selected == "Upload PDF":
st.subheader("π Upload a PDF")
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file:
pdf_reader = PdfReader(uploaded_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_text(text)
# Encode chunks and store embeddings in FAISS
embeddings = embedding_model.encode(chunks)
for i, embedding in enumerate(embeddings):
index.add(np.array([embedding]))
st.success(f"β
Processed {len(chunks)} chunks and stored embeddings in FAISS.")
st.session_state['chunks'] = chunks
# "Summary" Section
if selected == "Summary":
st.subheader("π Get a Summary of the Uploaded PDF")
if 'chunks' in st.session_state:
if st.button("Generate Summary"):
full_text = " ".join(st.session_state['chunks'][:5]) # Summarizing first 5 chunks as an example
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": f"Provide a concise summary of the following text:\n\n{full_text}",
}
],
model="llama3-8b-8192",
)
summary = chat_completion.choices[0].message.content
st.write("### π Summary:")
st.write(summary)
else:
st.info("Click the button above to generate a summary.")
else:
st.warning("Please upload a PDF in the 'Upload PDF' section first.")
# "About" Section
if selected == "About":
st.subheader("βΉοΈ About")
st.write("**Research Article Helper** is a tool to interact with research articles by uploading PDFs. Use it to ask questions, generate summaries, and gain insights from scientific documents.")
st.write("Built using Streamlit, FAISS, and Groq AI.")
|