hamzaherry commited on
Commit
42eb736
·
verified ·
1 Parent(s): 9e7ddd4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -34
app.py CHANGED
@@ -1,34 +1,123 @@
1
- def identify_major_research_gaps():
2
- # Extract high-level contradictions or differences
3
- key_differences = []
4
- for i, chunk_1 in enumerate(metadata_store):
5
- for j, chunk_2 in enumerate(metadata_store):
6
- if i >= j: # Avoid duplicate comparisons
7
- continue
8
- # Example heuristic to identify key differences
9
- if "not" in chunk_1["chunk"] and "is" in chunk_2["chunk"]:
10
- key_differences.append(
11
- f"Potential inconsistency in methodologies or interpretations between sections of the document."
12
- )
13
- elif "should" in chunk_1["chunk"] and "must" in chunk_2["chunk"]:
14
- key_differences.append(
15
- f"Divergence in recommendations or conclusions observed between sections."
16
- )
17
-
18
- # Summarize key points for display
19
- if key_differences:
20
- summary = (
21
- f"The analysis identified several key areas of divergence in the reviewed research papers:\n\n"
22
- + "\n".join(f"• {difference}" for difference in set(key_differences))
23
- + "\n\n"
24
- "These findings indicate areas where further investigation or clarification may be beneficial."
25
- )
26
- return summary
27
- else:
28
- return "No significant research gaps or inconsistencies were identified across the reviewed papers."
29
-
30
- # Replace logic inside the button event
31
- if st.button("🔍 Identify Research Gaps"):
32
- research_gap_summary = identify_major_research_gaps()
33
- st.write("### 🛠️ Research Gaps Identified:")
34
- st.write(research_gap_summary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from PyPDF2 import PdfReader
4
+ from sentence_transformers import SentenceTransformer
5
+ import faiss
6
+ import matplotlib.pyplot as plt
7
+ import numpy as np
8
+ from groq import Groq
9
+ import faiss
10
+
11
+ GROQ_API_KEY = "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"
12
+ client = Groq(api_key=GROQ_API_KEY)
13
+
14
+ # Initialize Embedding Model
15
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
16
+
17
+ # Initialize FAISS Index
18
+ embedding_dim = 384 # Dimensionality of 'all-MiniLM-L6-v2'
19
+ faiss_index = faiss.IndexFlatL2(embedding_dim)
20
+
21
+ # Store Metadata
22
+ metadata_store = []
23
+
24
+ # Function to extract text from PDFs
25
+ def extract_text_from_pdf(pdf_file):
26
+ pdf_reader = PdfReader(pdf_file)
27
+ text = ""
28
+ for page in pdf_reader.pages:
29
+ text += page.extract_text()
30
+ return text
31
+
32
+ # Function to chunk text
33
+ def chunk_text(text, chunk_size=500):
34
+ words = text.split()
35
+ return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
36
+
37
+ # Function to generate embeddings
38
+ def generate_embeddings(chunks):
39
+ return embedding_model.encode(chunks)
40
+
41
+ # Store embeddings in FAISS index
42
+ def store_embeddings(embeddings, metadata):
43
+ faiss_index.add(np.array(embeddings))
44
+ metadata_store.extend(metadata)
45
+
46
+ # Retrieve relevant chunks based on query
47
+ def retrieve_relevant_chunks(query, k=5):
48
+ query_embedding = embedding_model.encode([query])
49
+ distances, indices = faiss_index.search(query_embedding, k)
50
+ valid_results = [
51
+ (metadata_store[i], distances[0][j]) for j, i in enumerate(indices[0]) if i < len(metadata_store)
52
+ ]
53
+ return valid_results
54
+
55
+ # Call Groq API to get answers and research gap analysis
56
+ def ask_groq_api(question, context):
57
+ chat_completion = client.chat.completions.create(
58
+ messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
59
+ model="llama3-8b-8192"
60
+ )
61
+ return chat_completion.choices[0].message.content
62
+
63
+ # Streamlit UI setup
64
+ st.title("RAG-Based Research Paper Analyzer")
65
+
66
+ uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type="pdf")
67
+
68
+ if uploaded_files:
69
+ all_chunks = []
70
+ all_metadata = []
71
+ for uploaded_file in uploaded_files:
72
+ text = extract_text_from_pdf(uploaded_file)
73
+ chunks = chunk_text(text)
74
+ embeddings = generate_embeddings(chunks)
75
+ metadata = [{"chunk": chunk, "file_name": uploaded_file.name} for chunk in chunks]
76
+ store_embeddings(embeddings, metadata)
77
+ all_chunks.extend(chunks)
78
+ all_metadata.extend(metadata)
79
+
80
+ st.success("Files uploaded and processed successfully!")
81
+
82
+ # Button to view topic summaries with an icon
83
+ if st.button("View Topic Summaries", help="Click to view a brief summary of the uploaded papers", icon="book"):
84
+ for chunk in all_chunks[:3]:
85
+ st.write(chunk)
86
+
87
+ # User input for query with a magnifying glass icon
88
+ user_question = st.text_input("Ask a question about the uploaded papers:", help="Ask about specific research details", icon="search")
89
+
90
+ if user_question:
91
+ relevant_chunks = retrieve_relevant_chunks(user_question)
92
+ if relevant_chunks:
93
+ context = "\n\n".join([chunk['chunk'] for chunk, _ in relevant_chunks])
94
+ answer = ask_groq_api(user_question, context)
95
+ st.write("**Answer:**", answer)
96
+
97
+ # Implement Research Gap Identification based on inconsistencies between papers
98
+ st.subheader("Research Gap Analysis:", icon="exclamation")
99
+ research_gap = ask_groq_api("Identify research gaps and unanswered questions based on the following context:", context)
100
+ st.write(f"**Research Gaps Identified:** {research_gap}")
101
+ else:
102
+ st.write("No relevant sections found for your question.")
103
+
104
+ # Adding an icon for research gap feature
105
+ if st.button("Identify Research Gaps", help="Find unanswered questions or areas where research is lacking", icon="exclamation-triangle"):
106
+ st.write("**Research Gap Analysis:**")
107
+ # Here you would analyze the content and highlight gaps
108
+ research_gap_analysis = "Based on the analysis of the uploaded papers, several research gaps have been identified, including inconsistent findings in the areas of X, Y, and Z. Further research is needed to clarify these discrepancies."
109
+ st.write(research_gap_analysis)
110
+
111
+ # Button to generate scatter plot with a chart icon
112
+ if st.button("Generate Scatter Plot", icon="chart-bar"):
113
+ st.write("Generating scatter plot for methods vs. results...")
114
+ # Example scatter plot (replace with real data)
115
+ x = np.random.rand(10)
116
+ y = np.random.rand(10)
117
+ plt.scatter(x, y)
118
+ plt.xlabel("Methods")
119
+ plt.ylabel("Results")
120
+ st.pyplot(plt)
121
+
122
+ # Text area for annotations with a pencil icon
123
+ st.text_area("Annotate Your Insights:", height=100, key="annotations", help="Add your thoughts or comments here", icon="pencil-alt")