Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,34 +1,123 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
from PyPDF2 import PdfReader
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
import faiss
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import numpy as np
|
8 |
+
from groq import Groq
|
9 |
+
import faiss
|
10 |
+
|
11 |
+
GROQ_API_KEY = "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"
|
12 |
+
client = Groq(api_key=GROQ_API_KEY)
|
13 |
+
|
14 |
+
# Initialize Embedding Model
|
15 |
+
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
16 |
+
|
17 |
+
# Initialize FAISS Index
|
18 |
+
embedding_dim = 384 # Dimensionality of 'all-MiniLM-L6-v2'
|
19 |
+
faiss_index = faiss.IndexFlatL2(embedding_dim)
|
20 |
+
|
21 |
+
# Store Metadata
|
22 |
+
metadata_store = []
|
23 |
+
|
24 |
+
# Function to extract text from PDFs
|
25 |
+
def extract_text_from_pdf(pdf_file):
|
26 |
+
pdf_reader = PdfReader(pdf_file)
|
27 |
+
text = ""
|
28 |
+
for page in pdf_reader.pages:
|
29 |
+
text += page.extract_text()
|
30 |
+
return text
|
31 |
+
|
32 |
+
# Function to chunk text
|
33 |
+
def chunk_text(text, chunk_size=500):
|
34 |
+
words = text.split()
|
35 |
+
return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
|
36 |
+
|
37 |
+
# Function to generate embeddings
|
38 |
+
def generate_embeddings(chunks):
|
39 |
+
return embedding_model.encode(chunks)
|
40 |
+
|
41 |
+
# Store embeddings in FAISS index
|
42 |
+
def store_embeddings(embeddings, metadata):
|
43 |
+
faiss_index.add(np.array(embeddings))
|
44 |
+
metadata_store.extend(metadata)
|
45 |
+
|
46 |
+
# Retrieve relevant chunks based on query
|
47 |
+
def retrieve_relevant_chunks(query, k=5):
|
48 |
+
query_embedding = embedding_model.encode([query])
|
49 |
+
distances, indices = faiss_index.search(query_embedding, k)
|
50 |
+
valid_results = [
|
51 |
+
(metadata_store[i], distances[0][j]) for j, i in enumerate(indices[0]) if i < len(metadata_store)
|
52 |
+
]
|
53 |
+
return valid_results
|
54 |
+
|
55 |
+
# Call Groq API to get answers and research gap analysis
|
56 |
+
def ask_groq_api(question, context):
|
57 |
+
chat_completion = client.chat.completions.create(
|
58 |
+
messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
|
59 |
+
model="llama3-8b-8192"
|
60 |
+
)
|
61 |
+
return chat_completion.choices[0].message.content
|
62 |
+
|
63 |
+
# Streamlit UI setup
|
64 |
+
st.title("RAG-Based Research Paper Analyzer")
|
65 |
+
|
66 |
+
uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type="pdf")
|
67 |
+
|
68 |
+
if uploaded_files:
|
69 |
+
all_chunks = []
|
70 |
+
all_metadata = []
|
71 |
+
for uploaded_file in uploaded_files:
|
72 |
+
text = extract_text_from_pdf(uploaded_file)
|
73 |
+
chunks = chunk_text(text)
|
74 |
+
embeddings = generate_embeddings(chunks)
|
75 |
+
metadata = [{"chunk": chunk, "file_name": uploaded_file.name} for chunk in chunks]
|
76 |
+
store_embeddings(embeddings, metadata)
|
77 |
+
all_chunks.extend(chunks)
|
78 |
+
all_metadata.extend(metadata)
|
79 |
+
|
80 |
+
st.success("Files uploaded and processed successfully!")
|
81 |
+
|
82 |
+
# Button to view topic summaries with an icon
|
83 |
+
if st.button("View Topic Summaries", help="Click to view a brief summary of the uploaded papers", icon="book"):
|
84 |
+
for chunk in all_chunks[:3]:
|
85 |
+
st.write(chunk)
|
86 |
+
|
87 |
+
# User input for query with a magnifying glass icon
|
88 |
+
user_question = st.text_input("Ask a question about the uploaded papers:", help="Ask about specific research details", icon="search")
|
89 |
+
|
90 |
+
if user_question:
|
91 |
+
relevant_chunks = retrieve_relevant_chunks(user_question)
|
92 |
+
if relevant_chunks:
|
93 |
+
context = "\n\n".join([chunk['chunk'] for chunk, _ in relevant_chunks])
|
94 |
+
answer = ask_groq_api(user_question, context)
|
95 |
+
st.write("**Answer:**", answer)
|
96 |
+
|
97 |
+
# Implement Research Gap Identification based on inconsistencies between papers
|
98 |
+
st.subheader("Research Gap Analysis:", icon="exclamation")
|
99 |
+
research_gap = ask_groq_api("Identify research gaps and unanswered questions based on the following context:", context)
|
100 |
+
st.write(f"**Research Gaps Identified:** {research_gap}")
|
101 |
+
else:
|
102 |
+
st.write("No relevant sections found for your question.")
|
103 |
+
|
104 |
+
# Adding an icon for research gap feature
|
105 |
+
if st.button("Identify Research Gaps", help="Find unanswered questions or areas where research is lacking", icon="exclamation-triangle"):
|
106 |
+
st.write("**Research Gap Analysis:**")
|
107 |
+
# Here you would analyze the content and highlight gaps
|
108 |
+
research_gap_analysis = "Based on the analysis of the uploaded papers, several research gaps have been identified, including inconsistent findings in the areas of X, Y, and Z. Further research is needed to clarify these discrepancies."
|
109 |
+
st.write(research_gap_analysis)
|
110 |
+
|
111 |
+
# Button to generate scatter plot with a chart icon
|
112 |
+
if st.button("Generate Scatter Plot", icon="chart-bar"):
|
113 |
+
st.write("Generating scatter plot for methods vs. results...")
|
114 |
+
# Example scatter plot (replace with real data)
|
115 |
+
x = np.random.rand(10)
|
116 |
+
y = np.random.rand(10)
|
117 |
+
plt.scatter(x, y)
|
118 |
+
plt.xlabel("Methods")
|
119 |
+
plt.ylabel("Results")
|
120 |
+
st.pyplot(plt)
|
121 |
+
|
122 |
+
# Text area for annotations with a pencil icon
|
123 |
+
st.text_area("Annotate Your Insights:", height=100, key="annotations", help="Add your thoughts or comments here", icon="pencil-alt")
|