File size: 6,194 Bytes
42eb736 864d264 42eb736 864d264 fa3169a 864d264 fa3169a 864d264 fa3169a 864d264 42eb736 864d264 42eb736 864d264 42eb736 864d264 42eb736 864d264 42eb736 864d264 42eb736 864d264 42eb736 864d264 ee979d1 864d264 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import os
import streamlit as st
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
import faiss
import matplotlib.pyplot as plt
import numpy as np
from groq import Groq
import faiss
GROQ_API_KEY = "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"
client = Groq(api_key=GROQ_API_KEY)
# Initialize Embedding Model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# Initialize FAISS Index
embedding_dim = 384 # Dimensionality of 'all-MiniLM-L6-v2'
faiss_index = faiss.IndexFlatL2(embedding_dim)
# Store Metadata
metadata_store = []
# Function to identify unanswered questions based on comparative analysis of multiple papers
def identify_research_gaps(chunks):
unanswered_questions = []
# List of keywords related to research gaps
keywords = [
"future research", "unanswered questions", "limitations", "need for more research",
"open questions", "further investigation", "research gap", "areas for improvement",
"unknowns", "unexplored", "unresolved issues"
]
# Search for chunks containing any of the keywords
for chunk in chunks:
if any(keyword in chunk.lower() for keyword in keywords):
unanswered_questions.append(chunk)
return "\n".join(unanswered_questions) if unanswered_questions else "No specific unanswered questions found."
# Function to extract text from PDFs
def extract_text_from_pdf(pdf_file):
pdf_reader = PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Function to chunk text
def chunk_text(text, chunk_size=500):
words = text.split()
return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
# Function to generate embeddings
def generate_embeddings(chunks):
return embedding_model.encode(chunks)
# Store embeddings in FAISS index
def store_embeddings(embeddings, metadata):
faiss_index.add(np.array(embeddings))
metadata_store.extend(metadata)
# Retrieve relevant chunks based on query
def retrieve_relevant_chunks(query, k=5):
query_embedding = embedding_model.encode([query])
distances, indices = faiss_index.search(query_embedding, k)
valid_results = [
(metadata_store[i], distances[0][j]) for j, i in enumerate(indices[0]) if i < len(metadata_store)
]
return valid_results
# Call Groq API to get answers and research gap analysis
def ask_groq_api(question, context):
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
model="llama3-8b-8192"
)
return chat_completion.choices[0].message.content
# Streamlit UI setup
st.title("RAG-Based Research Paper Analyzer")
uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type="pdf")
if uploaded_files:
all_chunks = []
all_metadata = []
for uploaded_file in uploaded_files:
text = extract_text_from_pdf(uploaded_file)
chunks = chunk_text(text)
embeddings = generate_embeddings(chunks)
metadata = [{"chunk": chunk, "file_name": uploaded_file.name} for chunk in chunks]
store_embeddings(embeddings, metadata)
all_chunks.extend(chunks)
all_metadata.extend(metadata)
st.success("Files uploaded and processed successfully!")
# Button to view topic summaries with an emoji
if st.button("View Topic Summaries", help="Click to view a brief summary of the uploaded papers", icon="π"):
for chunk in all_chunks[:3]:
st.write(chunk)
# User input for query without the icon
user_question = st.text_input("Ask a question about the uploaded papers:", help="Ask about specific research details")
if user_question:
relevant_chunks = retrieve_relevant_chunks(user_question)
if relevant_chunks:
context = "\n\n".join([chunk['chunk'] for chunk, _ in relevant_chunks])
answer = ask_groq_api(user_question, context)
st.write("**Answer:**", answer)
# Implement Research Gap Identification based on inconsistencies between papers
st.subheader("Research Gap Analysis:", icon="β οΈ")
# We will analyze the chunks and context to identify research gaps
research_gap = analyze_research_gaps(all_chunks)
st.write(f"**Research Gaps Identified:** {research_gap}")
else:
st.write("No relevant sections found for your question.")
# Adding an emoji for research gap feature
if st.button("Identify Research Gaps", help="Find unanswered questions or areas where research is lacking", icon="β οΈ"):
st.write("**Research Gap Analysis:**")
# Implementing research gap analysis based on comparing papers
research_gap_analysis = identify_research_gaps(all_chunks)
st.write(research_gap_analysis)
# Button to generate scatter plot with a chart emoji
if st.button("Generate Scatter Plot", icon="π"):
st.write("Generating scatter plot for methods vs. results...")
# Example scatter plot (replace with real data)
x = np.random.rand(10)
y = np.random.rand(10)
plt.scatter(x, y)
plt.xlabel("Methods")
plt.ylabel("Results")
st.pyplot(plt)
# Text area for annotations without the icon
st.text_area("Annotate Your Insights:", height=100, key="annotations", help="Add your thoughts or comments here")
# Function to analyze and identify research gaps by comparing chunks from different papers
def analyze_research_gaps(chunks):
# Here we would compare text from different papers to identify discrepancies
gaps = []
for i, chunk_1 in enumerate(chunks):
for j, chunk_2 in enumerate(chunks):
if i != j:
# Simple heuristic to compare chunks for inconsistencies or gaps
if chunk_1[:100] != chunk_2[:100]: # Checking first 100 characters for difference
gaps.append(f"Potential inconsistency between chunk {i} and chunk {j}.")
return "\n".join(gaps) if gaps else "No major inconsistencies found."
|