Spaces:

hamzaherry
/

Research.Paper.Analyzer

Sleeping

App Files Files Community

hamzaherry commited on Jan 5

Commit

864d264

verified ·

1 Parent(s): 1580fc0

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -21

app.py CHANGED Viewed

@@ -6,8 +6,8 @@ import faiss
 import matplotlib.pyplot as plt
 import numpy as np
 from groq import Groq
-# Groq API Key
 GROQ_API_KEY = "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"
 client = Groq(api_key=GROQ_API_KEY)
@@ -21,7 +21,16 @@ faiss_index = faiss.IndexFlatL2(embedding_dim)
 # Store Metadata
 metadata_store = []
-# Function Definitions
 def extract_text_from_pdf(pdf_file):
     pdf_reader = PdfReader(pdf_file)
     text = ""
@@ -29,17 +38,21 @@ def extract_text_from_pdf(pdf_file):
         text += page.extract_text()
     return text
 def chunk_text(text, chunk_size=500):
     words = text.split()
     return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
 def generate_embeddings(chunks):
     return embedding_model.encode(chunks)
 def store_embeddings(embeddings, metadata):
     faiss_index.add(np.array(embeddings))
     metadata_store.extend(metadata)
 def retrieve_relevant_chunks(query, k=5):
     query_embedding = embedding_model.encode([query])
     distances, indices = faiss_index.search(query_embedding, k)
@@ -48,6 +61,7 @@ def retrieve_relevant_chunks(query, k=5):
     ]
     return valid_results
 def ask_groq_api(question, context):
     chat_completion = client.chat.completions.create(
         messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
@@ -55,23 +69,7 @@ def ask_groq_api(question, context):
     )
     return chat_completion.choices[0].message.content
-def analyze_research_gaps(chunks):
-    gaps = []
-    for i, chunk_1 in enumerate(chunks):
-        for j, chunk_2 in enumerate(chunks):
-            if i != j:
-                if chunk_1[:100] != chunk_2[:100]:  # Example heuristic
-                    gaps.append(f"Potential inconsistency between chunk {i} and chunk {j}.")
-    return "\n".join(gaps) if gaps else "No major inconsistencies found."
-def identify_research_gaps(chunks):
-    unanswered_questions = []
-    for chunk in chunks:
-        if "future research" in chunk or "unanswered questions" in chunk:
-            unanswered_questions.append(chunk)
-    return "\n".join(unanswered_questions) if unanswered_questions else "No specific unanswered questions found."
-# Main Streamlit App Logic
 st.title("RAG-Based Research Paper Analyzer")
 uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type="pdf")
@@ -90,6 +88,58 @@ if uploaded_files:
     st.success("Files uploaded and processed successfully!")
-    if st.button("Identify Research Gaps", help="Find unanswered questions or areas where research is lacking"):
         research_gap_analysis = identify_research_gaps(all_chunks)
-        st.write(f"**Research Gaps Identified:** {research_gap_analysis}")

 import matplotlib.pyplot as plt
 import numpy as np
 from groq import Groq
+import faiss
 GROQ_API_KEY = "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"
 client = Groq(api_key=GROQ_API_KEY)
 # Store Metadata
 metadata_store = []
+# Function to identify unanswered questions based on comparative analysis of multiple papers
+def identify_research_gaps(chunks):
+    unanswered_questions = []
+    # Simulate a simple search for keywords related to unanswered questions
+    for chunk in chunks:
+        if "future research" in chunk or "unanswered questions" in chunk:
+            unanswered_questions.append(chunk)
+    return "\n".join(unanswered_questions) if unanswered_questions else "No specific unanswered questions found."
+# Function to extract text from PDFs
 def extract_text_from_pdf(pdf_file):
     pdf_reader = PdfReader(pdf_file)
     text = ""
         text += page.extract_text()
     return text
+# Function to chunk text
 def chunk_text(text, chunk_size=500):
     words = text.split()
     return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
+# Function to generate embeddings
 def generate_embeddings(chunks):
     return embedding_model.encode(chunks)
+# Store embeddings in FAISS index
 def store_embeddings(embeddings, metadata):
     faiss_index.add(np.array(embeddings))
     metadata_store.extend(metadata)
+# Retrieve relevant chunks based on query
 def retrieve_relevant_chunks(query, k=5):
     query_embedding = embedding_model.encode([query])
     distances, indices = faiss_index.search(query_embedding, k)
     ]
     return valid_results
+# Call Groq API to get answers and research gap analysis
 def ask_groq_api(question, context):
     chat_completion = client.chat.completions.create(
         messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
     )
     return chat_completion.choices[0].message.content
+# Streamlit UI setup
 st.title("RAG-Based Research Paper Analyzer")
 uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type="pdf")
     st.success("Files uploaded and processed successfully!")
+    # Button to view topic summaries with an emoji
+    if st.button("View Topic Summaries", help="Click to view a brief summary of the uploaded papers", icon="📚"):
+        for chunk in all_chunks[:3]:
+            st.write(chunk)
+    # User input for query without the icon
+    user_question = st.text_input("Ask a question about the uploaded papers:", help="Ask about specific research details")
+    if user_question:
+        relevant_chunks = retrieve_relevant_chunks(user_question)
+        if relevant_chunks:
+            context = "\n\n".join([chunk['chunk'] for chunk, _ in relevant_chunks])
+            answer = ask_groq_api(user_question, context)
+            st.write("**Answer:**", answer)
+            # Implement Research Gap Identification based on inconsistencies between papers
+            st.subheader("Research Gap Analysis:", icon="⚠️")
+            # We will analyze the chunks and context to identify research gaps
+            research_gap = analyze_research_gaps(all_chunks)
+            st.write(f"**Research Gaps Identified:** {research_gap}")
+        else:
+            st.write("No relevant sections found for your question.")
+    # Adding an emoji for research gap feature
+    if st.button("Identify Research Gaps", help="Find unanswered questions or areas where research is lacking", icon="⚠️"):
+        st.write("**Research Gap Analysis:**")
+        # Implementing research gap analysis based on comparing papers
         research_gap_analysis = identify_research_gaps(all_chunks)
+        st.write(research_gap_analysis)
+    # Button to generate scatter plot with a chart emoji
+    if st.button("Generate Scatter Plot", icon="📊"):
+        st.write("Generating scatter plot for methods vs. results...")
+        # Example scatter plot (replace with real data)
+        x = np.random.rand(10)
+        y = np.random.rand(10)
+        plt.scatter(x, y)
+        plt.xlabel("Methods")
+        plt.ylabel("Results")
+        st.pyplot(plt)
+    # Text area for annotations without the icon
+    st.text_area("Annotate Your Insights:", height=100, key="annotations", help="Add your thoughts or comments here")
+# Function to analyze and identify research gaps by comparing chunks from different papers
+def analyze_research_gaps(chunks):
+    # Here we would compare text from different papers to identify discrepancies
+    gaps = []
+    for i, chunk_1 in enumerate(chunks):
+        for j, chunk_2 in enumerate(chunks):
+            if i != j:
+                # Simple heuristic to compare chunks for inconsistencies or gaps
+                if chunk_1[:100] != chunk_2[:100]:  # Checking first 100 characters for difference
+                    gaps.append(f"Potential inconsistency between chunk {i} and chunk {j}.")
+    return "\n".join(gaps) if gaps else "No major inconsistencies found."