Spaces:

hamzaherry
/

Research.Paper.Analyzer

Sleeping

App Files Files Community

hamzaherry commited on Jan 5

Commit

42eb736

verified ·

1 Parent(s): 9e7ddd4

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -34

app.py CHANGED Viewed

@@ -1,34 +1,123 @@
-def identify_major_research_gaps():
-    # Extract high-level contradictions or differences
-    key_differences = []
-    for i, chunk_1 in enumerate(metadata_store):
-        for j, chunk_2 in enumerate(metadata_store):
-            if i >= j:  # Avoid duplicate comparisons
-                continue
-            # Example heuristic to identify key differences
-            if "not" in chunk_1["chunk"] and "is" in chunk_2["chunk"]:
-                key_differences.append(
-                    f"Potential inconsistency in methodologies or interpretations between sections of the document."
-                )
-            elif "should" in chunk_1["chunk"] and "must" in chunk_2["chunk"]:
-                key_differences.append(
-                    f"Divergence in recommendations or conclusions observed between sections."
-                )
-    # Summarize key points for display
-    if key_differences:
-        summary = (
-            f"The analysis identified several key areas of divergence in the reviewed research papers:\n\n"
-            + "\n".join(f"• {difference}" for difference in set(key_differences))
-            + "\n\n"
-            "These findings indicate areas where further investigation or clarification may be beneficial."
-        )
-        return summary
-    else:
-        return "No significant research gaps or inconsistencies were identified across the reviewed papers."
-# Replace logic inside the button event
-if st.button("🔍 Identify Research Gaps"):
-    research_gap_summary = identify_major_research_gaps()
-    st.write("### 🛠️ Research Gaps Identified:")
-    st.write(research_gap_summary)

+import os
+import streamlit as st
+from PyPDF2 import PdfReader
+from sentence_transformers import SentenceTransformer
+import faiss
+import matplotlib.pyplot as plt
+import numpy as np
+from groq import Groq
+import faiss
+GROQ_API_KEY = "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"
+client = Groq(api_key=GROQ_API_KEY)
+# Initialize Embedding Model
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Initialize FAISS Index
+embedding_dim = 384  # Dimensionality of 'all-MiniLM-L6-v2'
+faiss_index = faiss.IndexFlatL2(embedding_dim)
+# Store Metadata
+metadata_store = []
+# Function to extract text from PDFs
+def extract_text_from_pdf(pdf_file):
+    pdf_reader = PdfReader(pdf_file)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    return text
+# Function to chunk text
+def chunk_text(text, chunk_size=500):
+    words = text.split()
+    return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
+# Function to generate embeddings
+def generate_embeddings(chunks):
+    return embedding_model.encode(chunks)
+# Store embeddings in FAISS index
+def store_embeddings(embeddings, metadata):
+    faiss_index.add(np.array(embeddings))
+    metadata_store.extend(metadata)
+# Retrieve relevant chunks based on query
+def retrieve_relevant_chunks(query, k=5):
+    query_embedding = embedding_model.encode([query])
+    distances, indices = faiss_index.search(query_embedding, k)
+    valid_results = [
+        (metadata_store[i], distances[0][j]) for j, i in enumerate(indices[0]) if i < len(metadata_store)
+    ]
+    return valid_results
+# Call Groq API to get answers and research gap analysis
+def ask_groq_api(question, context):
+    chat_completion = client.chat.completions.create(
+        messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
+        model="llama3-8b-8192"
+    )
+    return chat_completion.choices[0].message.content
+# Streamlit UI setup
+st.title("RAG-Based Research Paper Analyzer")
+uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type="pdf")
+if uploaded_files:
+    all_chunks = []
+    all_metadata = []
+    for uploaded_file in uploaded_files:
+        text = extract_text_from_pdf(uploaded_file)
+        chunks = chunk_text(text)
+        embeddings = generate_embeddings(chunks)
+        metadata = [{"chunk": chunk, "file_name": uploaded_file.name} for chunk in chunks]
+        store_embeddings(embeddings, metadata)
+        all_chunks.extend(chunks)
+        all_metadata.extend(metadata)
+    st.success("Files uploaded and processed successfully!")
+    # Button to view topic summaries with an icon
+    if st.button("View Topic Summaries", help="Click to view a brief summary of the uploaded papers", icon="book"):
+        for chunk in all_chunks[:3]:
+            st.write(chunk)
+    # User input for query with a magnifying glass icon
+    user_question = st.text_input("Ask a question about the uploaded papers:", help="Ask about specific research details", icon="search")
+    if user_question:
+        relevant_chunks = retrieve_relevant_chunks(user_question)
+        if relevant_chunks:
+            context = "\n\n".join([chunk['chunk'] for chunk, _ in relevant_chunks])
+            answer = ask_groq_api(user_question, context)
+            st.write("**Answer:**", answer)
+            # Implement Research Gap Identification based on inconsistencies between papers
+            st.subheader("Research Gap Analysis:", icon="exclamation")
+            research_gap = ask_groq_api("Identify research gaps and unanswered questions based on the following context:", context)
+            st.write(f"**Research Gaps Identified:** {research_gap}")
+        else:
+            st.write("No relevant sections found for your question.")
+    # Adding an icon for research gap feature
+    if st.button("Identify Research Gaps", help="Find unanswered questions or areas where research is lacking", icon="exclamation-triangle"):
+        st.write("**Research Gap Analysis:**")
+        # Here you would analyze the content and highlight gaps
+        research_gap_analysis = "Based on the analysis of the uploaded papers, several research gaps have been identified, including inconsistent findings in the areas of X, Y, and Z. Further research is needed to clarify these discrepancies."
+        st.write(research_gap_analysis)
+    # Button to generate scatter plot with a chart icon
+    if st.button("Generate Scatter Plot", icon="chart-bar"):
+        st.write("Generating scatter plot for methods vs. results...")
+        # Example scatter plot (replace with real data)
+        x = np.random.rand(10)
+        y = np.random.rand(10)
+        plt.scatter(x, y)
+        plt.xlabel("Methods")
+        plt.ylabel("Results")
+        st.pyplot(plt)
+    # Text area for annotations with a pencil icon
+    st.text_area("Annotate Your Insights:", height=100, key="annotations", help="Add your thoughts or comments here", icon="pencil-alt")