Spaces:

hamzaherry
/

Research.Paper.Analyzer

Sleeping

App Files Files Community

hamzaherry commited on Jan 5

Commit

1580fc0

verified ·

1 Parent(s): ee979d1

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -71

app.py CHANGED Viewed

@@ -6,8 +6,8 @@ import faiss
 import matplotlib.pyplot as plt
 import numpy as np
 from groq import Groq
-import faiss
 GROQ_API_KEY = "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"
 client = Groq(api_key=GROQ_API_KEY)
@@ -21,7 +21,7 @@ faiss_index = faiss.IndexFlatL2(embedding_dim)
 # Store Metadata
 metadata_store = []
-# Function to extract text from PDFs
 def extract_text_from_pdf(pdf_file):
     pdf_reader = PdfReader(pdf_file)
     text = ""
@@ -29,21 +29,17 @@ def extract_text_from_pdf(pdf_file):
         text += page.extract_text()
     return text
-# Function to chunk text
 def chunk_text(text, chunk_size=500):
     words = text.split()
     return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
-# Function to generate embeddings
 def generate_embeddings(chunks):
     return embedding_model.encode(chunks)
-# Store embeddings in FAISS index
 def store_embeddings(embeddings, metadata):
     faiss_index.add(np.array(embeddings))
     metadata_store.extend(metadata)
-# Retrieve relevant chunks based on query
 def retrieve_relevant_chunks(query, k=5):
     query_embedding = embedding_model.encode([query])
     distances, indices = faiss_index.search(query_embedding, k)
@@ -52,7 +48,6 @@ def retrieve_relevant_chunks(query, k=5):
     ]
     return valid_results
-# Call Groq API to get answers and research gap analysis
 def ask_groq_api(question, context):
     chat_completion = client.chat.completions.create(
         messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
@@ -60,7 +55,23 @@ def ask_groq_api(question, context):
     )
     return chat_completion.choices[0].message.content
-# Streamlit UI setup
 st.title("RAG-Based Research Paper Analyzer")
 uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type="pdf")
@@ -79,67 +90,6 @@ if uploaded_files:
     st.success("Files uploaded and processed successfully!")
-    # Button to view topic summaries with an emoji
-    if st.button("View Topic Summaries", help="Click to view a brief summary of the uploaded papers", icon="📚"):
-        for chunk in all_chunks[:3]:
-            st.write(chunk)
-    # User input for query without the icon
-    user_question = st.text_input("Ask a question about the uploaded papers:", help="Ask about specific research details")
-    if user_question:
-        relevant_chunks = retrieve_relevant_chunks(user_question)
-        if relevant_chunks:
-            context = "\n\n".join([chunk['chunk'] for chunk, _ in relevant_chunks])
-            answer = ask_groq_api(user_question, context)
-            st.write("**Answer:**", answer)
-            # Implement Research Gap Identification based on inconsistencies between papers
-            st.subheader("Research Gap Analysis:", icon="⚠️")
-            # We will analyze the chunks and context to identify research gaps
-            research_gap = analyze_research_gaps(all_chunks)
-            st.write(f"**Research Gaps Identified:** {research_gap}")
-        else:
-            st.write("No relevant sections found for your question.")
-    # Adding an emoji for research gap feature
-    if st.button("Identify Research Gaps", help="Find unanswered questions or areas where research is lacking", icon="⚠️"):
-        st.write("**Research Gap Analysis:**")
-        # Implementing research gap analysis based on comparing papers
         research_gap_analysis = identify_research_gaps(all_chunks)
-        st.write(research_gap_analysis)
-    # Button to generate scatter plot with a chart emoji
-    if st.button("Generate Scatter Plot", icon="📊"):
-        st.write("Generating scatter plot for methods vs. results...")
-        # Example scatter plot (replace with real data)
-        x = np.random.rand(10)
-        y = np.random.rand(10)
-        plt.scatter(x, y)
-        plt.xlabel("Methods")
-        plt.ylabel("Results")
-        st.pyplot(plt)
-    # Text area for annotations without the icon
-    st.text_area("Annotate Your Insights:", height=100, key="annotations", help="Add your thoughts or comments here")
-# Function to analyze and identify research gaps by comparing chunks from different papers
-def analyze_research_gaps(chunks):
-    # Here we would compare text from different papers to identify discrepancies
-    gaps = []
-    for i, chunk_1 in enumerate(chunks):
-        for j, chunk_2 in enumerate(chunks):
-            if i != j:
-                # Simple heuristic to compare chunks for inconsistencies or gaps
-                if chunk_1[:100] != chunk_2[:100]:  # Checking first 100 characters for difference
-                    gaps.append(f"Potential inconsistency between chunk {i} and chunk {j}.")
-    return "\n".join(gaps) if gaps else "No major inconsistencies found."
-# Function to identify unanswered questions based on comparative analysis of multiple papers
-def identify_research_gaps(chunks):
-    unanswered_questions = []
-    # Simulate a simple search for keywords related to unanswered questions
-    for chunk in chunks:
-        if "future research" in chunk or "unanswered questions" in chunk:
-            unanswered_questions.append(chunk)
-    return "\n".join(unanswered_questions) if unanswered_questions else "No specific unanswered questions found."

 import matplotlib.pyplot as plt
 import numpy as np
 from groq import Groq
+# Groq API Key
 GROQ_API_KEY = "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"
 client = Groq(api_key=GROQ_API_KEY)
 # Store Metadata
 metadata_store = []
+# Function Definitions
 def extract_text_from_pdf(pdf_file):
     pdf_reader = PdfReader(pdf_file)
     text = ""
         text += page.extract_text()
     return text
 def chunk_text(text, chunk_size=500):
     words = text.split()
     return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
 def generate_embeddings(chunks):
     return embedding_model.encode(chunks)
 def store_embeddings(embeddings, metadata):
     faiss_index.add(np.array(embeddings))
     metadata_store.extend(metadata)
 def retrieve_relevant_chunks(query, k=5):
     query_embedding = embedding_model.encode([query])
     distances, indices = faiss_index.search(query_embedding, k)
     ]
     return valid_results
 def ask_groq_api(question, context):
     chat_completion = client.chat.completions.create(
         messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
     )
     return chat_completion.choices[0].message.content
+def analyze_research_gaps(chunks):
+    gaps = []
+    for i, chunk_1 in enumerate(chunks):
+        for j, chunk_2 in enumerate(chunks):
+            if i != j:
+                if chunk_1[:100] != chunk_2[:100]:  # Example heuristic
+                    gaps.append(f"Potential inconsistency between chunk {i} and chunk {j}.")
+    return "\n".join(gaps) if gaps else "No major inconsistencies found."
+def identify_research_gaps(chunks):
+    unanswered_questions = []
+    for chunk in chunks:
+        if "future research" in chunk or "unanswered questions" in chunk:
+            unanswered_questions.append(chunk)
+    return "\n".join(unanswered_questions) if unanswered_questions else "No specific unanswered questions found."
+# Main Streamlit App Logic
 st.title("RAG-Based Research Paper Analyzer")
 uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type="pdf")
     st.success("Files uploaded and processed successfully!")
+    if st.button("Identify Research Gaps", help="Find unanswered questions or areas where research is lacking"):
         research_gap_analysis = identify_research_gaps(all_chunks)
+        st.write(f"**Research Gaps Identified:** {research_gap_analysis}")